From e5863fd619825cd3f850602e816608cff311afc1 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Sun, 5 Nov 2023 02:49:02 +0000 Subject: [PATCH] fixed layout --- composable_kernel/CHANGELOG.md => CHANGELOG.md | 0 composable_kernel/CITATION.cff => CITATION.cff | 0 composable_kernel/CMakeLists.txt => CMakeLists.txt | 0 .../CONTRIBUTORS.md => CONTRIBUTORS.md | 0 .../Config.cmake.in => Config.cmake.in | 0 composable_kernel/Dockerfile => Dockerfile | 0 composable_kernel/Jenkinsfile => Jenkinsfile | 0 composable_kernel/LICENSE => LICENSE | 0 composable_kernel/README.md => README.md | 0 .../01_gemm/CMakeLists.txt | 0 .../01_gemm/gemm.cpp | 0 .../02_gemm_add_add_fastgelu/CMakeLists.txt | 0 .../gemm_add_add_fastgelu.cpp | 0 .../gemm_add_add_fastgelu_generic.cpp | 0 .../02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp | 0 .../gemm_add_fastgelu_generic.cpp | 0 .../02_gemm_add_add_fastgelu/gemm_fastgelu.cpp | 0 .../gemm_fastgelu_generic.cpp | 0 .../03_gemm_layernorm/CMakeLists.txt | 0 .../gemm_add_add_layernorm_naive.cpp | 0 .../gemm_add_relu_add_layernorm_welford.cpp | 0 .../04_contraction/CMakeLists.txt | 0 .../04_contraction/contraction_bilinear_fp32.cpp | 0 .../04_contraction/contraction_bilinear_fp64.cpp | 0 .../contraction_g1m2n3k1_add_xdl_fp16.cpp | 0 .../04_contraction/contraction_scale_fp32.cpp | 0 .../04_contraction/contraction_scale_fp64.cpp | 0 .../05_layernorm/CMakeLists.txt | 0 .../05_layernorm/layernorm2d.cpp | 0 .../06_softmax/CMakeLists.txt | 0 .../06_softmax/softmax4d.cpp | 0 .../07_grouped_convnd_fwd/CMakeLists.txt | 0 .../07_grouped_convnd_fwd/grouped_conv1d_fwd.cpp | 0 .../07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp | 0 .../08_fused_attention/CMakeLists.txt | 0 .../08_fused_attention/fused_attention.cpp | 0 .../08_fused_attention/fused_attention_bias.cpp | 0 .../09_quantization/CMakeLists.txt | 0 ...conv2d_fwd_bias_relu_perchannel_quantization.cpp | 0 .../conv2d_fwd_bias_relu_perlayer_quantization.cpp | 0 ...conv2d_fwd_bias_tanh_perchannel_quantization.cpp | 0 .../conv2d_fwd_bias_tanh_perlayer_quantization.cpp | 0 .../conv2d_fwd_perchannel_quantization.cpp | 0 .../conv2d_fwd_perlayer_quantization.cpp | 0 .../09_quantization/gemm_quantization.cpp | 0 .../10_grouped_convnd_bwd_data/CMakeLists.txt | 0 .../grouped_conv2d_bwd_data.cpp | 0 .../grouped_conv3d_bwd_data.cpp | 0 ...rouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp | 0 .../11_grouped_conv_bwd_weight/CMakeLists.txt | 0 .../11_grouped_conv_bwd_weight/common.hpp | 0 .../grouped_conv1d_bwd_weight_fp16.cpp | 0 .../grouped_conv2d_bwd_weight_fp16.cpp | 0 .../grouped_conv3d_bwd_weight_fp16.cpp | 0 .../grouped_conv3d_bwd_weight_fp32.cpp | 0 .../12_elementwise_normalization/CMakeLists.txt | 0 .../elementwise_layernorm2d.cpp | 0 .../13_batchnorm/CMakeLists.txt | 0 .../13_batchnorm/batchnorm_bwd_nhwc.cpp | 0 .../13_batchnorm/batchnorm_fwd_nhwc.cpp | 0 .../13_batchnorm/batchnorm_infer_nhwc.cpp | 0 .../14_instance_id/CMakeLists.txt | 0 .../14_instance_id/batchnorm_fwd_instance_id.cpp | 0 .../15_convnd_bwd_data/CMakeLists.txt | 0 .../15_convnd_bwd_data/common.hpp | 0 .../15_convnd_bwd_data/conv3d_bwd_data_fp16.cpp | 0 .../15_convnd_bwd_data/conv3d_bwd_data_fp32.cpp | 0 .../15_gemm_add_multiply/CMakeLists.txt | 0 .../15_gemm_add_multiply/gemm_add_multiply.cpp | 0 .../15_reduce/CMakeLists.txt | 0 .../15_reduce/reduce_nhwc_c.cpp | 0 .../16_convnd_fwd/CMakeLists.txt | 0 .../16_convnd_fwd/common.hpp | 0 .../16_convnd_fwd/conv3d_fwd_fp16.cpp | 0 .../16_convnd_fwd/conv3d_fwd_fp16_comp_fp8.cpp | 0 .../16_convnd_fwd/conv3d_fwd_fp32.cpp | 0 .../17_grouped_gemm_fastgelu/CMakeLists.txt | 0 .../grouped_gemm_fastgelu.cpp | 0 .../18_groupnorm/CMakeLists.txt | 0 .../18_groupnorm/groupnorm_swish.cpp | 0 .../19_pool/CMakeLists.txt | 0 .../19_pool/avg_pool3d_bwd.cpp | 0 .../19_pool/avg_pool3d_fwd.cpp | 0 .../19_pool/max_pool2d_bwd.cpp | 0 .../19_pool/max_pool2d_fwd.cpp | 0 .../20_splitk_gemm/CMakeLists.txt | 0 .../20_splitk_gemm/splitK_gemm_fp16_f8.cpp | 0 .../21_grouped_gemm_bias/CMakeLists.txt | 0 .../grouped_gemm_fixed_nk_bias_fp16.cpp | 0 .../22_grouped_gemm/CMakeLists.txt | 0 .../22_grouped_gemm/grouped_gemm_fixed_nk_fp16.cpp | 0 .../22_grouped_gemm/grouped_gemm_fixed_nk_fp8.cpp | 0 .../22_grouped_gemm/grouped_gemm_fixed_nk_i8.cpp | 0 .../22_im2col_col2im/CMakeLists.txt | 0 .../22_im2col_col2im/column_to_image.cpp | 0 .../22_im2col_col2im/image_to_column.cpp | 0 .../CMakeLists.txt | 0 .../client_example => client_example}/README.md | 0 {composable_kernel/cmake => cmake}/Analyzers.cmake | 0 {composable_kernel/cmake => cmake}/ClangTidy.cmake | 0 {composable_kernel/cmake => cmake}/CppCheck.cmake | 0 {composable_kernel/cmake => cmake}/DoxygenDoc.cmake | 0 .../cmake => cmake}/EnableCompilerWarnings.cmake | 0 .../cmake => cmake}/TargetFlags.cmake | 0 {composable_kernel/cmake => cmake}/googletest.cmake | 0 .../dev-requirements.txt => dev-requirements.txt | 0 .../docs => docs}/API_Reference_Guide.rst | 0 .../docs => docs}/Contributors_Guide.rst | 0 .../docs => docs}/Supported_Primitives_Guide.rst | 0 {composable_kernel/docs => docs}/conf.py | 0 .../docs => docs}/data/ck_component.png | Bin {composable_kernel/docs => docs}/data/ck_layer.png | Bin {composable_kernel/docs => docs}/dockerhub.rst | 0 {composable_kernel/docs => docs}/doxygen/Doxyfile | 0 {composable_kernel/docs => docs}/index.rst | 0 {composable_kernel/docs => docs}/license.rst | 0 {composable_kernel/docs => docs}/refs.bib | 0 {composable_kernel/docs => docs}/sphinx/_toc.yml.in | 0 .../docs => docs}/sphinx/requirements.in | 0 .../docs => docs}/sphinx/requirements.txt | 0 .../docs => docs}/tutorial_hello_world.rst | 0 .../example => example}/01_gemm/CMakeLists.txt | 0 .../example => example}/01_gemm/README.md | 0 .../example => example}/01_gemm/common.hpp | 0 .../example => example}/01_gemm/gemm_dl_fp16.cpp | 0 .../example => example}/01_gemm/gemm_dl_fp32.cpp | 0 .../example => example}/01_gemm/gemm_dl_int4.cpp | 0 .../example => example}/01_gemm/gemm_dl_int8.cpp | 0 .../example => example}/01_gemm/gemm_dpp_fp16.cpp | 0 .../example => example}/01_gemm/gemm_wmma_fp16.cpp | 0 .../example => example}/01_gemm/gemm_xdl_bf16.cpp | 0 .../01_gemm/gemm_xdl_bf16_rtn.cpp | 0 .../example => example}/01_gemm/gemm_xdl_fp16.cpp | 0 .../01_gemm/gemm_xdl_fp16_fp8.cpp | 0 .../example => example}/01_gemm/gemm_xdl_fp64.cpp | 0 .../example => example}/01_gemm/gemm_xdl_fp8.cpp | 0 .../01_gemm/gemm_xdl_fp8_bf8.cpp | 0 .../example => example}/01_gemm/gemm_xdl_int4.cpp | 0 .../example => example}/01_gemm/gemm_xdl_int8.cpp | 0 .../01_gemm/gemm_xdl_skip_b_lds_fp16.cpp | 0 .../01_gemm/gemm_xdl_streamk.cpp | 0 .../01_gemm/gemm_xdl_wavelet_fp16.cpp | 0 .../01_gemm/run_gemm_example.inc | 0 .../02_gemm_bilinear/CMakeLists.txt | 0 .../example => example}/02_gemm_bilinear/README.md | 0 .../02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp | 0 .../02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp | 0 .../02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp | 0 .../03_gemm_bias_relu/CMakeLists.txt | 0 .../example => example}/03_gemm_bias_relu/README.md | 0 .../03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp | 0 .../04_gemm_add_add_fastgelu/CMakeLists.txt | 0 .../04_gemm_add_add_fastgelu/README.md | 0 .../04_gemm_add_add_fastgelu/common.hpp | 0 .../gemm_add_add_fastgelu_xdl_bf16.cpp | 0 .../gemm_add_add_fastgelu_xdl_fp16.cpp | 0 .../gemm_add_add_fastgelu_xdl_fp32.cpp | 0 .../gemm_add_add_fastgelu_xdl_int4.cpp | 0 .../gemm_add_add_fastgelu_xdl_int8.cpp | 0 .../run_gemm_add_add_fastgelu_example.inc | 0 .../09_convnd_fwd/CMakeLists.txt | 0 .../example => example}/09_convnd_fwd/README.md | 0 .../09_convnd_fwd/convnd_fwd_common.hpp | 0 .../09_convnd_fwd/convnd_fwd_dl_common.hpp | 0 .../09_convnd_fwd/convnd_fwd_dl_fp16.cpp | 0 .../09_convnd_fwd/convnd_fwd_dl_fp32.cpp | 0 .../09_convnd_fwd/convnd_fwd_dl_int8.cpp | 0 .../09_convnd_fwd/convnd_fwd_xdl_bf16.cpp | 0 .../09_convnd_fwd/convnd_fwd_xdl_fp16.cpp | 0 .../09_convnd_fwd/convnd_fwd_xdl_fp32.cpp | 0 .../09_convnd_fwd/convnd_fwd_xdl_fp64.cpp | 0 .../09_convnd_fwd/convnd_fwd_xdl_int8.cpp | 0 .../09_convnd_fwd/run_convnd_fwd_dl_example.inc | 0 .../09_convnd_fwd/run_convnd_fwd_example.inc | 0 .../CMakeLists.txt | 0 .../common.hpp | 0 .../convnd_fwd_max_xdl_bf16.cpp | 0 .../convnd_fwd_max_xdl_fp16.cpp | 0 .../convnd_fwd_max_xdl_fp32.cpp | 0 .../convnd_fwd_max_xdl_int4.cpp | 0 .../convnd_fwd_max_xdl_int8.cpp | 0 .../run_convnd_fwd_max_example.inc | 0 .../example => example}/12_reduce/CMakeLists.txt | 0 .../example => example}/12_reduce/README.md | 0 .../12_reduce/reduce_blockwise.cpp | 0 .../12_reduce/reduce_blockwise_impl.hpp | 0 .../12_reduce/reduce_blockwise_two_call.cpp | 0 .../12_reduce/reduce_example_common.hpp | 0 .../12_reduce/reduce_multiblock_atomic_add.cpp | 0 .../12_reduce/reduce_multiblock_atomic_add_impl.hpp | 0 .../13_pool2d_fwd/CMakeLists.txt | 0 .../example => example}/13_pool2d_fwd/README.md | 0 .../13_pool2d_fwd/pool2d_fwd_common.hpp | 0 .../13_pool2d_fwd/pool2d_fwd_fp16.cpp | 0 .../13_pool2d_fwd/pool2d_fwd_fp32.cpp | 0 .../14_gemm_quantization/CMakeLists.txt | 0 .../gemm_dl_quantization_int8.cpp | 0 .../gemm_xdl_bias_relu_quantization_int8.cpp | 0 .../gemm_xdl_quantization_int8.cpp | 0 .../15_grouped_gemm/CMakeLists.txt | 0 .../example => example}/15_grouped_gemm/README.md | 0 .../grouped_gemm_multiple_d_dl_fp16.cpp | 0 .../15_grouped_gemm/grouped_gemm_xdl_bf16.cpp | 0 .../grouped_gemm_xdl_fixed_nk_bias_fp16.cpp | 0 .../grouped_gemm_xdl_fixed_nk_fp16.cpp | 0 .../grouped_gemm_xdl_fixed_nk_fp8.cpp | 0 .../15_grouped_gemm/grouped_gemm_xdl_fp16.cpp | 0 .../15_grouped_gemm/grouped_gemm_xdl_fp32.cpp | 0 .../15_grouped_gemm/grouped_gemm_xdl_int4.cpp | 0 .../15_grouped_gemm/grouped_gemm_xdl_int8.cpp | 0 .../grouped_gemm_xdl_splitk_fp16.cpp | 0 .../15_grouped_gemm/run_grouped_gemm_example.inc | 0 .../16_gemm_multi_d_multi_reduces/CMakeLists.txt | 0 .../gemm_add_add_mean_meansquare_xdl_fp16.cpp | 0 .../gemm_add_addsquare_xdl_int8.cpp | 0 .../gemm_max_xdl_bf16.cpp | 0 .../gemm_max_xdl_fp16.cpp | 0 .../gemm_max_xdl_fp32.cpp | 0 .../gemm_max_xdl_int4.cpp | 0 .../gemm_max_xdl_int8.cpp | 0 .../gemm_mean_meansquare_xdl_bf16.cpp | 0 .../gemm_mean_meansquare_xdl_fp16.cpp | 0 .../gemm_mean_meansquare_xdl_fp32.cpp | 0 .../gemm_reduce_xdl_common.hpp | 0 .../17_convnd_bwd_data/CMakeLists.txt | 0 .../17_convnd_bwd_data/README.md | 0 .../17_convnd_bwd_data/convnd_bwd_data_common.hpp | 0 .../17_convnd_bwd_data/convnd_bwd_data_dl_fp16.cpp | 0 .../17_convnd_bwd_data/convnd_bwd_data_xdl_fp16.cpp | 0 .../18_batched_gemm_reduce/CMakeLists.txt | 0 .../batched_gemm_reduce_xdl_fp16.cpp | 0 .../19_binary_elementwise/CMakeLists.txt | 0 .../broadcast_add_2d_amn_bn.cpp | 0 .../broadcast_add_3d_am_bmnk.cpp | 0 .../19_binary_elementwise/elementwise_add_1d.cpp | 0 .../19_binary_elementwise/elementwise_add_4d.cpp | 0 .../20_grouped_conv_bwd_weight/CMakeLists.txt | 0 .../20_grouped_conv_bwd_weight/common.hpp | 0 .../grouped_conv_bwd_weight_dl_fp16.cpp | 0 .../grouped_conv_bwd_weight_wmma_fp16.cpp | 0 .../grouped_conv_bwd_weight_xdl_bf16.cpp | 0 .../grouped_conv_bwd_weight_xdl_fp16.cpp | 0 ...rouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp | 0 .../run_grouped_conv_bwd_weight_example.inc | 0 .../21_gemm_layernorm/CMakeLists.txt | 0 .../gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp | 0 ...emm_bias_relu_add_layernorm_xdl_welford_fp16.cpp | 0 .../gemm_layernorm_xdl_naive_fp16.cpp | 0 .../gemm_xdl_layernorm_naive_single_kernel_fp16.cpp | 0 .../example => example}/22_cgemm/CMakeLists.txt | 0 .../example => example}/22_cgemm/cgemm_xdl_bf16.cpp | 0 .../22_cgemm/cgemm_xdl_common.hpp | 0 .../example => example}/22_cgemm/cgemm_xdl_fp16.cpp | 0 .../example => example}/22_cgemm/cgemm_xdl_fp32.cpp | 0 .../example => example}/22_cgemm/cgemm_xdl_int4.cpp | 0 .../example => example}/22_cgemm/cgemm_xdl_int8.cpp | 0 .../example => example}/23_softmax/CMakeLists.txt | 0 .../example => example}/23_softmax/README.md | 0 .../23_softmax/softmax_blockwise.cpp | 0 .../24_batched_gemm/CMakeLists.txt | 0 .../24_batched_gemm/batched_gemm_xdl_bf16.cpp | 0 .../24_batched_gemm/batched_gemm_xdl_fp16.cpp | 0 .../24_batched_gemm/batched_gemm_xdl_fp32.cpp | 0 .../24_batched_gemm/batched_gemm_xdl_int4.cpp | 0 .../24_batched_gemm/batched_gemm_xdl_int8.cpp | 0 .../24_batched_gemm/run_batched_gemm_example.inc | 0 .../25_gemm_bias_e_permute/CMakeLists.txt | 0 .../gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp | 0 .../gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp | 0 .../26_contraction/CMakeLists.txt | 0 .../example => example}/26_contraction/README.md | 0 .../contraction_bilinear_xdl_fp32.cpp | 0 .../contraction_bilinear_xdl_fp64.cpp | 0 .../26_contraction/contraction_scale_xdl_fp32.cpp | 0 .../26_contraction/contraction_scale_xdl_fp64.cpp | 0 .../example => example}/27_layernorm/CMakeLists.txt | 0 .../example => example}/27_layernorm/common.hpp | 0 .../27_layernorm/layernorm_fp16.cpp | 0 .../27_layernorm/layernorm_splitk_fp16.cpp | 0 .../27_layernorm/run_layernorm_example.inc | 0 .../28_grouped_gemm_bias_e_permute/CMakeLists.txt | 0 .../grouped_gemm_bias_e_permute_xdl_fp16.cpp | 0 .../29_batched_gemm_bias_e_permute/CMakeLists.txt | 0 .../batched_gemm_bias_e_permute_wmma_fp16.cpp | 0 .../batched_gemm_bias_e_permute_xdl_fp16.cpp | 0 .../30_grouped_conv_fwd_multiple_d/CMakeLists.txt | 0 .../30_grouped_conv_fwd_multiple_d/README.md | 0 .../30_grouped_conv_fwd_multiple_d/common.hpp | 0 .../30_grouped_conv_fwd_multiple_d/common_wmma.hpp | 0 .../grouped_conv_fwd_bias_relu_add_wmma_fp16.cpp | 0 .../grouped_conv_fwd_bias_relu_add_wmma_int8.cpp | 0 .../grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp | 0 .../grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp | 0 .../grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp | 0 .../grouped_conv_fwd_bias_relu_add_xdl_int4.cpp | 0 .../grouped_conv_fwd_bias_relu_add_xdl_int8.cpp | 0 .../grouped_conv_fwd_xdl_fp16.cpp | 0 .../run_grouped_conv_fwd_bias_relu_add_example.inc | 0 ..._grouped_conv_fwd_bias_relu_add_wmma_example.inc | 0 .../run_grouped_conv_fwd_example.inc | 0 .../31_batched_gemm_gemm/CMakeLists.txt | 0 .../batched_gemm_gemm_xdl_bf16.cpp | 0 .../batched_gemm_gemm_xdl_fp16.cpp | 0 .../batched_gemm_gemm_xdl_fp32.cpp | 0 .../batched_gemm_gemm_xdl_int4.cpp | 0 .../batched_gemm_gemm_xdl_int8.cpp | 0 .../run_batched_gemm_gemm_example.inc | 0 .../CMakeLists.txt | 0 ...triangle_scale_softmax_gemm_permute_xdl_fp16.cpp | 0 ...hed_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp | 0 ...hed_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp | 0 .../batched_gemm_scale_softmax_gemm_xdl_bf16.cpp | 0 .../batched_gemm_scale_softmax_gemm_xdl_fp16.cpp | 0 ...triangle_scale_softmax_gemm_permute_xdl_fp16.cpp | 0 ...ped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp | 0 .../run_batched_gemm_scale_softmax_gemm.inc | 0 .../run_batched_gemm_scale_softmax_gemm_permute.inc | 0 .../run_grouped_gemm_scale_softmax_gemm_permute.inc | 0 .../33_multiple_reduce/CMakeLists.txt | 0 .../33_multiple_reduce/README.md | 0 .../33_multiple_reduce/dual_reduce_common.hpp | 0 .../33_multiple_reduce/dual_reduce_multiblock.cpp | 0 .../33_multiple_reduce/dual_reduce_threadwise.cpp | 0 .../example => example}/34_batchnorm/CMakeLists.txt | 0 .../example => example}/34_batchnorm/README.md | 0 .../34_batchnorm/batchnorm_backward_nhwc.cpp | 0 .../34_batchnorm/batchnorm_common.hpp | 0 .../batchnorm_forward_inferring_nhwc.cpp | 0 .../batchnorm_forward_training_nhwc.cpp | 0 .../batchnorm_forward_training_nhwc_obsolete.cpp | 0 .../34_batchnorm/batchnorm_infer_impl.hpp | 0 .../35_splitK_gemm/CMakeLists.txt | 0 .../35_splitK_gemm/run_splitK_gemm_example.inc | 0 .../35_splitK_gemm/splitK_gemm_xdl_bf16.cpp | 0 .../35_splitK_gemm/splitK_gemm_xdl_fp16.cpp | 0 .../35_splitK_gemm/splitK_gemm_xdl_fp32.cpp | 0 .../35_splitK_gemm/splitK_gemm_xdl_int4.cpp | 0 .../35_splitK_gemm/splitK_gemm_xdl_int8.cpp | 0 .../36_sparse_embedding/CMakeLists.txt | 0 .../sparse_embedding3_forward_layernorm.cpp | 0 .../CMakeLists.txt | 0 .../batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp | 0 .../CMakeLists.txt | 0 .../38_grouped_conv_bwd_data_multiple_d/common.hpp | 0 .../grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp | 0 .../grouped_conv_bwd_data_wmma_fp16.cpp | 0 .../grouped_conv_bwd_data_xdl_fp16.cpp | 0 .../run_grouped_conv_bwd_data_bias_relu_example.inc | 0 .../run_grouped_conv_bwd_data_example.inc | 0 .../example => example}/39_permute/CMakeLists.txt | 0 .../example => example}/39_permute/common.hpp | 0 .../39_permute/permute_1xHxW_fp16.cpp | 0 .../39_permute/permute_HxWx4_fp16.cpp | 0 .../39_permute/permute_NxHxW_fp16.cpp | 0 .../39_permute/run_permute_bundle_example.inc | 0 .../39_permute/run_permute_element_example.inc | 0 .../40_conv2d_fwd_quantization/CMakeLists.txt | 0 .../40_conv2d_fwd_quantization/common.hpp | 0 ...wd_dl_bias_relu_perchannel_quantization_int8.cpp | 0 ..._fwd_dl_bias_relu_perlayer_quantization_int8.cpp | 0 ...wd_dl_bias_tanh_perchannel_quantization_int8.cpp | 0 ..._fwd_dl_bias_tanh_perlayer_quantization_int8.cpp | 0 .../conv2d_fwd_dl_perchannel_quantization_int8.cpp | 0 .../conv2d_fwd_dl_perlayer_quantization_int8.cpp | 0 ...d_xdl_bias_relu_perchannel_quantization_int8.cpp | 0 ...fwd_xdl_bias_relu_perlayer_quantization_int8.cpp | 0 .../conv2d_fwd_xdl_perchannel_quantization_int8.cpp | 0 .../conv2d_fwd_xdl_perlayer_quantization_int8.cpp | 0 ...v2d_fwd_bias_perchannel_quantization_example.inc | 0 ...onv2d_fwd_bias_perlayer_quantization_example.inc | 0 ...n_conv2d_fwd_perchannel_quantization_example.inc | 0 ...run_conv2d_fwd_perlayer_quantization_example.inc | 0 .../41_grouped_conv_conv_fwd/CMakeLists.txt | 0 .../grouped_conv_conv_fwd_xdl_bf16.cpp | 0 .../grouped_conv_conv_fwd_xdl_fp16.cpp | 0 .../grouped_conv_conv_fwd_xdl_fp32.cpp | 0 .../grouped_conv_conv_fwd_xdl_int4.cpp | 0 .../grouped_conv_conv_fwd_xdl_int8.cpp | 0 .../run_grouped_conv_conv_fwd_example.inc | 0 .../example => example}/42_groupnorm/CMakeLists.txt | 0 .../example => example}/42_groupnorm/common.hpp | 0 .../42_groupnorm/groupnorm_sigmoid_mul_fp16.cpp | 0 .../42_groupnorm/groupnorm_splitk_fp16.cpp | 0 .../42_groupnorm/groupnorm_swish_fp16.cpp | 0 .../42_groupnorm/run_groupnorm_example.inc | 0 .../43_splitk_gemm_bias_e_permute/CMakeLists.txt | 0 .../splitk_gemm_bias_e_permute_xdl_fp16.cpp | 0 .../splitk_gemm_bias_e_permute_xdl_fp32.cpp | 0 .../44_elementwise_permute/CMakeLists.txt | 0 .../elementwise_permute_4D_fp16.cpp | 0 .../elementwise_permute_4D_fp16_2d.cpp | 0 .../45_elementwise_normalization/CMakeLists.txt | 0 .../elementwise_layernorm_blockwise.cpp | 0 .../46_gemm_add_multiply/CMakeLists.txt | 0 .../46_gemm_add_multiply/README.md | 0 .../46_gemm_add_multiply/common.hpp | 0 .../gemm_add_multiply_dl_fp16.cpp | 0 .../gemm_add_multiply_xdl_fp16.cpp | 0 .../run_gemm_add_multiply_example.inc | 0 .../CMakeLists.txt | 0 .../gemm_bias_softmax_gemm_permute.cpp | 0 .../48_pool3d_fwd/CMakeLists.txt | 0 .../48_pool3d_fwd/pool3d_fwd_common.hpp | 0 .../48_pool3d_fwd/pool3d_fwd_fp16.cpp | 0 .../49_maxpool2d_bwd/CMakeLists.txt | 0 .../49_maxpool2d_bwd/maxpool2d_bwd_bf16.cpp | 0 .../49_maxpool2d_bwd/maxpool2d_bwd_common.hpp | 0 .../49_maxpool2d_bwd/maxpool2d_bwd_fp16.cpp | 0 .../49_maxpool2d_bwd/maxpool2d_bwd_fp32.cpp | 0 .../50_put_element/CMakeLists.txt | 0 .../50_put_element/put_element_fp16.cpp | 0 .../51_avgpool3d_bwd/CMakeLists.txt | 0 .../51_avgpool3d_bwd/avgpool3d_bwd_bf16.cpp | 0 .../51_avgpool3d_bwd/avgpool3d_bwd_common.hpp | 0 .../51_avgpool3d_bwd/avgpool3d_bwd_fp16.cpp | 0 .../51_avgpool3d_bwd/avgpool3d_bwd_fp32.cpp | 0 .../52_im2col_col2im/CMakeLists.txt | 0 .../52_im2col_col2im/column_to_image_f32.cpp | 0 .../example => example}/52_im2col_col2im/common.hpp | 0 .../52_im2col_col2im/image_to_column_f32.cpp | 0 .../53_gemv_splitk/CMakeLists.txt | 0 .../example => example}/53_gemv_splitk/README.md | 0 .../example => example}/53_gemv_splitk/common.hpp | 0 .../53_gemv_splitk/gemv_splitk_fp16.cpp | 0 .../53_gemv_splitk/run_gemv_splitk_example.inc | 0 .../54_tall_and_skinny_gemm_splitk/CMakeLists.txt | 0 .../54_tall_and_skinny_gemm_splitk/README.md | 0 .../54_tall_and_skinny_gemm_splitk/common.hpp | 0 .../run_tall_and_skinny_gemm_splitk_example.inc | 0 .../tall_and_skinny_gemm_splitk_fp16.cpp | 0 .../60_gemm_multi_ABD/CMakeLists.txt | 0 .../60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp | 0 .../61_contraction_multi_ABD/CMakeLists.txt | 0 .../contraction_multi_ABD_xdl_fp16.cpp | 0 .../62_conv_fwd_activ/CMakeLists.txt | 0 .../62_conv_fwd_activ/convnd_fwd_activ_common.hpp | 0 .../62_conv_fwd_activ/convnd_fwd_xdl_abs_fp16.cpp | 0 .../convnd_fwd_xdl_clippedrelu_fp16.cpp | 0 .../62_conv_fwd_activ/convnd_fwd_xdl_elu_fp16.cpp | 0 .../convnd_fwd_xdl_leakyrelu_fp16.cpp | 0 .../62_conv_fwd_activ/convnd_fwd_xdl_pow_fp16.cpp | 0 .../62_conv_fwd_activ/convnd_fwd_xdl_relu_fp16.cpp | 0 .../convnd_fwd_xdl_sigmoid_fp16.cpp | 0 .../convnd_fwd_xdl_softrelu_fp16.cpp | 0 .../62_conv_fwd_activ/convnd_fwd_xdl_tanh_fp16.cpp | 0 .../run_convnd_fwd_activ_example.inc | 0 .../example => example}/CMakeLists.txt | 0 {composable_kernel/include => include}/ck/ck.hpp | 0 .../include => include}/ck/config.h.in | 0 .../ck/host_utility/device_prop.hpp | 0 .../ck/host_utility/hip_check_error.hpp | 0 .../include => include}/ck/host_utility/io.hpp | 0 .../ck/host_utility/kernel_launch.hpp | 0 .../ck/host_utility/stream_utility.hpp | 0 ...olution3d_into_gemm_v4r4r4_ndhwc_kzyxc_ndhwk.hpp | 0 .../include => include}/ck/stream_config.hpp | 0 .../include => include}/ck/tensor/static_tensor.hpp | 0 .../ck/tensor_description/cluster_descriptor.hpp | 0 .../ck/tensor_description/multi_index_transform.hpp | 0 .../multi_index_transform_helper.hpp | 0 .../ck/tensor_description/tensor_adaptor.hpp | 0 .../ck/tensor_description/tensor_descriptor.hpp | 0 .../tensor_description/tensor_descriptor_helper.hpp | 0 .../tensor_space_filling_curve.hpp | 0 .../gpu/block/blockwise_gemm_dl_v2r3.hpp | 0 .../gpu/block/blockwise_gemm_dlops_v2r2.hpp | 0 .../gpu/block/blockwise_gemm_dlops_v3.hpp | 0 .../gpu/block/blockwise_gemm_dpp.hpp | 0 .../gpu/block/blockwise_gemm_wmma.hpp | 0 .../gpu/block/blockwise_gemm_xdlops.hpp | 0 .../gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp | 0 .../gpu/block/blockwise_softmax.hpp | 0 .../block/blockwise_tensor_slice_transfer_v5r1.hpp | 0 .../gpu/block/blockwise_welford.hpp | 0 .../gpu/block/reduction_functions_blockwise.hpp | 0 .../thread_group_tensor_slice_transfer_v4r1.hpp | 0 .../thread_group_tensor_slice_transfer_v6r1.hpp | 0 .../thread_group_tensor_slice_transfer_v6r1r2.hpp | 0 .../thread_group_tensor_slice_transfer_v6r2.hpp | 0 .../thread_group_tensor_slice_transfer_v6r3.hpp | 0 .../block/thread_group_tensor_slice_transfer_v7.hpp | 0 .../thread_group_tensor_slice_transfer_v7r2.hpp | 0 .../gpu/device/conv_tensor_rearrange_op.hpp | 0 .../convolution_backward_data_specialization.hpp | 0 .../convolution_backward_weight_specialization.hpp | 0 .../device/convolution_forward_specialization.hpp | 0 .../gpu/device/device_avgpool_bwd.hpp | 0 .../ck/tensor_operation/gpu/device/device_base.hpp | 0 .../device_batched_contraction_multiple_d.hpp | 0 .../gpu/device/device_batched_gemm.hpp | 0 .../gpu/device/device_batched_gemm_e_permute.hpp | 0 .../gpu/device/device_batched_gemm_gemm.hpp | 0 .../gpu/device/device_batched_gemm_multi_d.hpp | 0 ...vice_batched_gemm_multiple_d_gemm_multiple_d.hpp | 0 .../gpu/device/device_batched_gemm_softmax_gemm.hpp | 0 .../device_batched_gemm_softmax_gemm_permute.hpp | 0 .../gpu/device/device_batchnorm_backward.hpp | 0 .../gpu/device/device_batchnorm_forward.hpp | 0 .../gpu/device/device_batchnorm_infer.hpp | 0 .../ck/tensor_operation/gpu/device/device_cgemm.hpp | 0 .../gpu/device/device_contraction_multiple_abd.hpp | 0 .../gpu/device/device_contraction_multiple_d.hpp | 0 .../gpu/device/device_conv_bwd_data.hpp | 0 .../tensor_operation/gpu/device/device_conv_fwd.hpp | 0 .../gpu/device/device_conv_fwd_bias_activation.hpp | 0 .../device/device_conv_fwd_bias_activation_add.hpp | 0 .../gpu/device/device_conv_tensor_rearrange.hpp | 0 .../gpu/device/device_elementwise.hpp | 0 .../gpu/device/device_elementwise_normalization.hpp | 0 .../ck/tensor_operation/gpu/device/device_gemm.hpp | 0 .../gpu/device/device_gemm_bias_e_permute.hpp | 0 .../gpu/device/device_gemm_multiple_abd.hpp | 0 .../gpu/device/device_gemm_multiple_d.hpp | 0 .../gpu/device/device_gemm_multiple_d_layernorm.hpp | 0 .../device/device_gemm_multiple_d_multiple_r.hpp | 0 .../gpu/device/device_gemm_reduce.hpp | 0 .../gpu/device/device_gemm_splitk.hpp | 0 .../gpu/device/device_gemm_streamk.hpp | 0 .../device_grouped_contraction_multiple_d.hpp | 0 .../device_grouped_conv_bwd_data_multiple_d.hpp | 0 .../gpu/device/device_grouped_conv_bwd_weight.hpp | 0 .../gpu/device/device_grouped_conv_fwd.hpp | 0 .../device/device_grouped_conv_fwd_multiple_d.hpp | 0 .../gpu/device/device_grouped_gemm.hpp | 0 .../gpu/device/device_grouped_gemm_fixed_nk.hpp | 0 .../device_grouped_gemm_softmax_gemm_permute.hpp | 0 .../gpu/device/device_grouped_gemm_splitk.hpp | 0 .../gpu/device/device_max_pool_bwd.hpp | 0 .../gpu/device/device_multiple_reduce.hpp | 0 .../gpu/device/device_normalization.hpp | 0 .../tensor_operation/gpu/device/device_permute.hpp | 0 .../tensor_operation/gpu/device/device_pool_fwd.hpp | 0 .../gpu/device/device_put_element.hpp | 0 .../tensor_operation/gpu/device/device_reduce.hpp | 0 .../tensor_operation/gpu/device/device_softmax.hpp | 0 .../device/device_splitk_contraction_multiple_d.hpp | 0 .../gpu/device/device_tall_and_skinny_gemm.hpp | 0 .../gpu/device/gemm_specialization.hpp | 0 .../impl/device_avgpool3d_bwd_ndhwc_ndhwc.hpp | 0 ...batched_contraction_multiple_d_wmma_cshuffle.hpp | 0 ..._batched_contraction_multiple_d_xdl_cshuffle.hpp | 0 .../impl/device_batched_gemm_e_permute_xdl.hpp | 0 .../impl/device_batched_gemm_gemm_xdl_cshuffle.hpp | 0 .../device/impl/device_batched_gemm_multi_d_xdl.hpp | 0 .../impl/device_batched_gemm_multiple_d_dl.hpp | 0 ...gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp | 0 .../device_batched_gemm_reduce_xdl_cshuffle.hpp | 0 ...tched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp | 0 ...evice_batched_gemm_softmax_gemm_xdl_cshuffle.hpp | 0 .../gpu/device/impl/device_batched_gemm_xdl.hpp | 0 .../device/impl/device_batchnorm_backward_impl.hpp | 0 .../device/impl/device_batchnorm_forward_impl.hpp | 0 .../impl/device_batchnorm_forward_impl_obsolete.hpp | 0 .../device/impl/device_cgemm_4gemm_xdl_cshuffle.hpp | 0 .../gpu/device/impl/device_column_to_image_impl.hpp | 0 ...device_contraction_multiple_abd_xdl_cshuffle.hpp | 0 .../device_contraction_multiple_d_xdl_cshuffle.hpp | 0 ...backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp | 0 .../device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp | 0 ...c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp | 0 ...xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp | 0 ...vice_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp | 0 .../impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp | 0 .../device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp | 0 .../device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp | 0 .../impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp | 0 .../impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp | 0 .../gpu/device/impl/device_elementwise_2d_impl.hpp | 0 .../gpu/device/impl/device_elementwise_impl.hpp | 0 .../impl/device_elementwise_normalization_impl.hpp | 0 .../device_gemm_bias_add_reduce_xdl_cshuffle.hpp | 0 .../gpu/device/impl/device_gemm_dl.hpp | 0 .../gpu/device/impl/device_gemm_dpp.hpp | 0 .../impl/device_gemm_multiple_abd_xdl_cshuffle.hpp | 0 .../gpu/device/impl/device_gemm_multiple_d_dl.hpp | 0 ...evice_gemm_multiple_d_layernorm_xdl_cshuffle.hpp | 0 ...vice_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp | 0 .../impl/device_gemm_multiple_d_wmma_cshuffle.hpp | 0 .../impl/device_gemm_multiple_d_xdl_cshuffle.hpp | 0 .../device/impl/device_gemm_reduce_xdl_cshuffle.hpp | 0 .../gpu/device/impl/device_gemm_wmma.hpp | 0 .../gpu/device/impl/device_gemm_xdl.hpp | 0 .../gpu/device/impl/device_gemm_xdl_cshuffle.hpp | 0 .../impl/device_gemm_xdl_layernorm_cshuffle.hpp | 0 .../gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp | 0 .../impl/device_gemm_xdl_splitk_c_shuffle.hpp | 0 .../gpu/device/impl/device_gemm_xdl_streamk.hpp | 0 .../impl/device_gemm_xdl_waveletmodel_cshuffle.hpp | 0 ..._grouped_contraction_multiple_d_xdl_cshuffle.hpp | 0 ...ouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp | 0 ...ped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp | 0 .../impl/device_grouped_conv_bwd_weight_dl.hpp | 0 ...device_grouped_conv_bwd_weight_wmma_cshuffle.hpp | 0 .../device_grouped_conv_bwd_weight_xdl_cshuffle.hpp | 0 ...rouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp | 0 .../device_grouped_conv_fwd_dl_nhwc_kyxc_nhwk.hpp | 0 ...evice_grouped_conv_fwd_multiple_d_multiple_r.hpp | 0 ..._conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp | 0 ...ce_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp | 0 ...ice_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp | 0 .../gpu/device/impl/device_grouped_conv_utils.hpp | 0 .../impl/device_grouped_gemm_multiple_d_dl.hpp | 0 ...ouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp | 0 .../gpu/device/impl/device_grouped_gemm_xdl.hpp | 0 .../impl/device_grouped_gemm_xdl_fixed_nk.hpp | 0 .../device_grouped_gemm_xdl_splitk_cshuffle.hpp | 0 .../gpu/device/impl/device_image_to_column_impl.hpp | 0 .../gpu/device/impl/device_max_pool_bwd_impl.hpp | 0 .../impl/device_multiple_reduce_multiblock.hpp | 0 .../impl/device_multiple_reduce_threadwise.hpp | 0 .../gpu/device/impl/device_normalization_impl.hpp | 0 .../impl/device_normalization_splitk_impl.hpp | 0 .../gpu/device/impl/device_permute_impl.hpp | 0 .../gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp | 0 .../device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp | 0 .../gpu/device/impl/device_put_element_impl.hpp | 0 .../gpu/device/impl/device_reduce_common.hpp | 0 .../gpu/device/impl/device_reduce_multiblock.hpp | 0 .../gpu/device/impl/device_reduce_threadwise.hpp | 0 .../gpu/device/impl/device_softmax_impl.hpp | 0 .../device_sparse_embeddings_forward_layernorm.hpp | 0 ...e_splitk_contraction_multiple_d_xdl_cshuffle.hpp | 0 .../impl/device_tall_and_skinny_gemm_splitk.hpp | 0 .../gpu/device/masking_specialization.hpp | 0 .../tensor_operation/gpu/device/matrix_padder.hpp | 0 .../gpu/device/reduction_operator_mapping.hpp | 0 .../tensor_operation/gpu/device/tensor_layout.hpp | 0 .../gpu/device/tensor_specialization.hpp | 0 .../tensor_operation/gpu/device/welford_helper.hpp | 0 .../gpu/element/binary_element_wise_operation.hpp | 0 .../gpu/element/element_wise_operation.hpp | 0 .../gpu/element/quantization_operation.hpp | 0 .../gpu/element/unary_element_wise_operation.hpp | 0 .../gridwise_multiblock_batchnorm_forward.hpp | 0 ..._reduce_second_half_batchnorm_backward_final.hpp | 0 .../gridwise_multiblock_welford_first_half.hpp | 0 ...second_half_batchnorm_forward_final_obsolete.hpp | 0 ...ord_second_half_multiblock_reduce_first_half.hpp | 0 .../gpu/grid/block_to_ctile_map.hpp | 0 ...m_multiple_d_welford_first_half_xdl_cshuffle.hpp | 0 .../gridwise_welford_second_half_layernorm2d.hpp | 0 .../gridwise_2d_multiple_reduction_multiblock.hpp | 0 .../gridwise_2d_multiple_reduction_threadwise.hpp | 0 .../gpu/grid/gridwise_2d_reduction_multiblock.hpp | 0 .../gpu/grid/gridwise_2d_reduction_threadwise.hpp | 0 .../gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp | 0 ...m_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp | 0 ...gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp | 0 ...se_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp | 0 ...ridwise_batchnorm_backward_blockwise_welford.hpp | 0 ...gridwise_batchnorm_forward_blockwise_welford.hpp | 0 .../gpu/grid/gridwise_elementwise_1d.hpp | 0 .../gpu/grid/gridwise_elementwise_2d.hpp | 0 ...dwise_elementwise_layernorm_welford_variance.hpp | 0 ...ridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp | 0 .../gpu/grid/gridwise_gemm_dl_multiple_d.hpp | 0 .../gpu/grid/gridwise_gemm_dl_v1r3.hpp | 0 .../tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp | 0 .../gridwise_gemm_multiple_abd_xdl_cshuffle.hpp | 0 ...wise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp | 0 .../grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp | 0 .../grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp | 0 ...gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp | 0 .../gpu/grid/gridwise_gemm_pipeline_selector.hpp | 0 .../gpu/grid/gridwise_gemm_pipeline_v1.hpp | 0 .../gpu/grid/gridwise_gemm_pipeline_v2.hpp | 0 .../gpu/grid/gridwise_gemm_pipeline_v3.hpp | 0 .../grid/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp | 0 ...ridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp | 0 ...wise_gemm_split_k_multiple_d_xdl_cshuffle_v2.hpp | 0 .../gpu/grid/gridwise_gemm_waveletmodel.hpp | 0 .../gpu/grid/gridwise_gemm_wmma.hpp | 0 .../gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp | 0 .../gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp | 0 .../gridwise_gemm_xdl_waveletmodel_cshuffle.hpp | 0 .../gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp | 0 .../gpu/grid/gridwise_gemm_xdlops_skip_b_lds_v1.hpp | 0 .../gpu/grid/gridwise_gemm_xdlops_streamk.hpp | 0 .../gpu/grid/gridwise_gemm_xdlops_v2r3.hpp | 0 .../gpu/grid/gridwise_gemm_xdlops_v2r4.hpp | 0 .../gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp | 0 .../gpu/grid/gridwise_gemm_xdlops_v3r1.hpp | 0 .../gpu/grid/gridwise_gemm_xdlops_v3r2.hpp | 0 .../gpu/grid/gridwise_gemm_xdlops_v3r3.hpp | 0 .../gpu/grid/gridwise_gemv_splitk.hpp | 0 .../tensor_operation/gpu/grid/gridwise_permute.hpp | 0 .../gpu/grid/gridwise_put_element_1d.hpp | 0 .../gpu/grid/gridwise_set_buffer_value.hpp | 0 .../gpu/grid/gridwise_set_multiple_buffer_value.hpp | 0 .../tensor_operation/gpu/grid/gridwise_softmax.hpp | 0 ...gridwise_sparse_embeddings_forward_layernorm.hpp | 0 .../grid/gridwise_tall_and_skinny_gemm_splitk.hpp | 0 .../gpu/grid/gridwise_tensor_rearrange.hpp | 0 .../gridwise_normalization_naive_variance.hpp | 0 .../gridwise_normalization_selector.hpp | 0 .../gridwise_normalization_splitk_1st.hpp | 0 .../gridwise_normalization_splitk_2nd.hpp | 0 .../gridwise_normalization_welford_variance.hpp | 0 .../gpu/thread/reduction_functions_threadwise.hpp | 0 .../gpu/thread/threadwise_contraction_dl.hpp | 0 .../gpu/thread/threadwise_gemm_dlops_v3.hpp | 0 .../gpu/thread/threadwise_tensor_slice_set.hpp | 0 .../gpu/thread/threadwise_tensor_slice_transfer.hpp | 0 .../threadwise_tensor_slice_transfer_v3r1.hpp | 0 .../threadwise_tensor_slice_transfer_v4r1.hpp | 0 .../threadwise_tensor_slice_transfer_v5r1.hpp | 0 .../threadwise_tensor_slice_transfer_v6r1.hpp | 0 .../threadwise_tensor_slice_transfer_v6r1r2.hpp | 0 .../threadwise_tensor_slice_transfer_v6r2.hpp | 0 .../threadwise_tensor_slice_transfer_v6r3.hpp | 0 .../thread/threadwise_tensor_slice_transfer_v7.hpp | 0 .../threadwise_tensor_slice_transfer_v7r2.hpp | 0 .../gpu/thread/threadwise_welford.hpp | 0 .../ck/tensor_operation/gpu/warp/dpp_gemm.hpp | 0 .../ck/tensor_operation/gpu/warp/wmma_gemm.hpp | 0 .../ck/tensor_operation/gpu/warp/xdlops_gemm.hpp | 0 .../transform_contraction_to_gemm.hpp | 0 .../transform_conv_bwd_data_to_gemm_v1.hpp | 0 .../transform_conv_fwd_to_gemm.hpp | 0 .../ck/utility/amd_address_space.hpp | 0 .../ck/utility/amd_buffer_addressing.hpp | 0 .../include => include}/ck/utility/amd_gemm_dpp.hpp | 0 .../ck/utility/amd_inline_asm.hpp | 0 .../ck/utility/amd_wave_read_first_lane.hpp | 0 .../include => include}/ck/utility/amd_wmma.hpp | 0 .../include => include}/ck/utility/amd_xdlops.hpp | 0 .../include => include}/ck/utility/array.hpp | 0 .../ck/utility/array_multi_index.hpp | 0 .../ck/utility/c_style_pointer_cast.hpp | 0 .../ck/utility/common_header.hpp | 0 .../ck/utility/container_element_picker.hpp | 0 .../ck/utility/container_helper.hpp | 0 .../include => include}/ck/utility/data_type.hpp | 0 .../include => include}/ck/utility/debug.hpp | 0 .../ck/utility/dynamic_buffer.hpp | 0 .../include => include}/ck/utility/enable_if.hpp | 0 .../include => include}/ck/utility/f8_utils.hpp | 0 .../include => include}/ck/utility/functional.hpp | 0 .../include => include}/ck/utility/functional2.hpp | 0 .../include => include}/ck/utility/functional3.hpp | 0 .../include => include}/ck/utility/functional4.hpp | 0 .../ck/utility/generic_memory_space_atomic.hpp | 0 .../include => include}/ck/utility/get_id.hpp | 0 .../include => include}/ck/utility/get_shift.hpp | 0 .../include => include}/ck/utility/ignore.hpp | 0 .../ck/utility/inner_product.hpp | 0 .../ck/utility/inner_product_dpp8.hpp | 0 .../ck/utility/integral_constant.hpp | 0 .../include => include}/ck/utility/is_detected.hpp | 0 .../ck/utility/is_known_at_compile_time.hpp | 0 .../ck/utility/loop_scheduler.hpp | 0 .../ck/utility/magic_division.hpp | 0 .../include => include}/ck/utility/math.hpp | 0 .../include => include}/ck/utility/math_v2.hpp | 0 .../include => include}/ck/utility/multi_index.hpp | 0 .../include => include}/ck/utility/number.hpp | 0 .../include => include}/ck/utility/random_gen.hpp | 0 .../ck/utility/reduction_common.hpp | 0 .../ck/utility/reduction_enums.hpp | 0 .../ck/utility/reduction_functions_accumulate.hpp | 0 .../ck/utility/reduction_operator.hpp | 0 .../include => include}/ck/utility/sequence.hpp | 0 .../ck/utility/sequence_helper.hpp | 0 .../include => include}/ck/utility/span.hpp | 0 .../ck/utility/static_buffer.hpp | 0 .../ck/utility/statically_indexed_array.hpp | 0 .../statically_indexed_array_multi_index.hpp | 0 .../ck/utility/synchronization.hpp | 0 .../include => include}/ck/utility/thread_group.hpp | 0 .../ck/utility/transpose_vectors.hpp | 0 .../include => include}/ck/utility/tuple.hpp | 0 .../include => include}/ck/utility/tuple_helper.hpp | 0 .../include => include}/ck/utility/type.hpp | 0 .../include => include}/ck/utility/type_convert.hpp | 0 .../ck/utility/workgroup_barrier.hpp | 0 .../ck/utility/workgroup_synchronization.hpp | 0 .../include => include}/ck/version.h.in | 0 .../library => library}/CMakeLists.txt | 0 .../cpu/reference_avgpool_bwd.hpp | 0 .../cpu/reference_batched_gemm.hpp | 0 .../cpu/reference_batchnorm_backward.hpp | 0 .../cpu/reference_batchnorm_forward.hpp | 0 .../cpu/reference_batchnorm_infer.hpp | 0 .../cpu/reference_cgemm.hpp | 0 .../cpu/reference_column_to_image.hpp | 0 .../cpu/reference_contraction.hpp | 0 .../cpu/reference_conv_bwd_data.hpp | 0 .../cpu/reference_conv_bwd_weight.hpp | 0 .../cpu/reference_conv_fwd.hpp | 0 .../cpu/reference_conv_fwd_bias_activation.hpp | 0 .../cpu/reference_conv_fwd_bias_activation_add.hpp | 0 .../cpu/reference_gemm.hpp | 0 .../cpu/reference_gemm_layernorm.hpp | 0 .../cpu/reference_groupnorm.hpp | 0 .../cpu/reference_image_to_column.hpp | 0 .../cpu/reference_layernorm.hpp | 0 .../cpu/reference_maxpool_bwd.hpp | 0 .../cpu/reference_pool_fwd.hpp | 0 .../cpu/reference_reduce.hpp | 0 .../cpu/reference_softmax.hpp | 0 ...eference_sparse_embedding3_forward_layernorm.hpp | 0 .../gpu/naive_conv_fwd.hpp | 0 .../add_device_operation_instance.hpp | 0 .../device_operation_instance_factory.hpp | 0 .../gpu/avg_pool3d_bwd.hpp | 0 .../tensor_operation_instance/gpu/batched_gemm.hpp | 0 .../gpu/batched_gemm_add_relu_gemm_add.hpp | 0 .../gpu/batched_gemm_bias_permute.hpp | 0 .../gpu/batched_gemm_bias_softmax_gemm_permute.hpp | 0 .../gpu/batched_gemm_gemm.hpp | 0 .../gpu/batched_gemm_multi_d.hpp | 0 .../gpu/batched_gemm_softmax_gemm.hpp | 0 .../gpu/batched_gemm_softmax_gemm_permute.hpp | 0 .../gpu/batchnorm_backward.hpp | 0 .../gpu/batchnorm_forward.hpp | 0 .../gpu/batchnorm_infer.hpp | 0 .../gpu/contraction_bilinear.hpp | 0 .../gpu/contraction_scale.hpp | 0 .../gpu/conv_tensor_rearrange.hpp | 0 .../device_column_to_image_instance.hpp | 0 .../device_image_to_column_instance.hpp | 0 .../gpu/convolution_backward_data.hpp | 0 .../gpu/convolution_forward.hpp | 0 .../gpu/device_elementwise_instance.hpp | 0 .../gpu/device_gemm_mean_squaremean_instance.hpp | 0 .../gpu/elementwise_normalization.hpp | 0 .../library/tensor_operation_instance/gpu/gemm.hpp | 0 .../gpu/gemm_add_add_fastgelu.hpp | 0 .../gpu/gemm_add_fastgelu.hpp | 0 .../gpu/gemm_add_multiply.hpp | 0 .../gpu/gemm_add_relu_add_layernorm.hpp | 0 .../tensor_operation_instance/gpu/gemm_bilinear.hpp | 0 .../tensor_operation_instance/gpu/gemm_fastgelu.hpp | 0 .../gpu/gemm_multiply_add.hpp | 0 .../tensor_operation_instance/gpu/gemm_splitk.hpp | 0 .../tensor_operation_instance/gpu/gemm_streamk.hpp | 0 .../tensor_operation_instance/gpu/gemv_splitk.hpp | 0 .../device_grouped_conv_bwd_data_wmma_instance.hpp | 0 .../device_grouped_conv_bwd_data_xdl_instance.hpp | 0 .../device_grouped_conv_bwd_weight_dl_instance.hpp | 0 ...device_grouped_conv_bwd_weight_wmma_instance.hpp | 0 .../device_grouped_conv_bwd_weight_xdl_instance.hpp | 0 .../device_grouped_conv2d_fwd_wmma_instance.hpp | 0 .../device_grouped_conv_fwd_dl_instance.hpp | 0 .../device_grouped_conv_fwd_wmma_instance.hpp | 0 .../device_grouped_conv_fwd_xdl_instance.hpp | 0 .../gpu/grouped_convolution_backward_data.hpp | 0 .../gpu/grouped_convolution_backward_weight.hpp | 0 .../gpu/grouped_convolution_forward.hpp | 0 .../tensor_operation_instance/gpu/grouped_gemm.hpp | 0 .../gpu/grouped_gemm_bias.hpp | 0 .../gpu/grouped_gemm_fastgelu.hpp | 0 .../gpu/grouped_gemm_fixed_nk.hpp | 0 .../gpu/image_to_column.hpp | 0 .../tensor_operation_instance/gpu/max_pool_bwd.hpp | 0 .../tensor_operation_instance/gpu/normalization.hpp | 0 .../gpu/normalization_swish.hpp | 0 .../tensor_operation_instance/gpu/pool3d_fwd.hpp | 0 .../gpu/quantization/gemm_quantization.hpp | 0 ...olution_bias_forward_perchannel_quantization.hpp | 0 ...nvolution_bias_forward_perlayer_quantization.hpp | 0 ..._convolution_forward_perchannel_quantization.hpp | 0 ...ed_convolution_forward_perlayer_quantization.hpp | 0 .../gpu/reduce/device_reduce_instance.hpp | 0 .../gpu/reduce/device_reduce_instance_blockwise.hpp | 0 ...ce_reduce_instance_blockwise_b16_f32_b16_add.hpp | 0 ...e_reduce_instance_blockwise_b16_f32_b16_amax.hpp | 0 ...ce_reduce_instance_blockwise_b16_f32_b16_avg.hpp | 0 ...ce_reduce_instance_blockwise_b16_f32_b16_max.hpp | 0 ...ce_reduce_instance_blockwise_b16_f32_b16_min.hpp | 0 ..._reduce_instance_blockwise_b16_f32_b16_norm2.hpp | 0 ...e_reduce_instance_blockwise_f16_f16_f16_amax.hpp | 0 ...ce_reduce_instance_blockwise_f16_f16_f16_max.hpp | 0 ...ce_reduce_instance_blockwise_f16_f16_f16_min.hpp | 0 ...ce_reduce_instance_blockwise_f16_f32_f16_add.hpp | 0 ...ce_reduce_instance_blockwise_f16_f32_f16_avg.hpp | 0 ..._reduce_instance_blockwise_f16_f32_f16_norm2.hpp | 0 ...ce_reduce_instance_blockwise_f32_f32_f32_add.hpp | 0 ...e_reduce_instance_blockwise_f32_f32_f32_amax.hpp | 0 ...ce_reduce_instance_blockwise_f32_f32_f32_avg.hpp | 0 ...ce_reduce_instance_blockwise_f32_f32_f32_max.hpp | 0 ...ce_reduce_instance_blockwise_f32_f32_f32_min.hpp | 0 ..._reduce_instance_blockwise_f32_f32_f32_norm2.hpp | 0 ...ce_reduce_instance_blockwise_f32_f64_f32_add.hpp | 0 ...ce_reduce_instance_blockwise_f32_f64_f32_avg.hpp | 0 ..._reduce_instance_blockwise_f32_f64_f32_norm2.hpp | 0 ...ce_reduce_instance_blockwise_f64_f64_f64_add.hpp | 0 ...e_reduce_instance_blockwise_f64_f64_f64_amax.hpp | 0 ...ce_reduce_instance_blockwise_f64_f64_f64_avg.hpp | 0 ...ce_reduce_instance_blockwise_f64_f64_f64_max.hpp | 0 ...ce_reduce_instance_blockwise_f64_f64_f64_min.hpp | 0 ..._reduce_instance_blockwise_f64_f64_f64_norm2.hpp | 0 ...vice_reduce_instance_blockwise_i8_i32_i8_add.hpp | 0 ...vice_reduce_instance_blockwise_i8_i32_i8_avg.hpp | 0 ...vice_reduce_instance_blockwise_i8_i8_i8_amax.hpp | 0 ...evice_reduce_instance_blockwise_i8_i8_i8_max.hpp | 0 ...evice_reduce_instance_blockwise_i8_i8_i8_min.hpp | 0 .../reduce/device_reduce_instance_impl_common.hpp | 0 ...device_reduce_instance_multiblock_atomic_add.hpp | 0 ...stance_multiblock_atomic_add_b16_f32_f32_add.hpp | 0 ...stance_multiblock_atomic_add_b16_f32_f32_avg.hpp | 0 ...stance_multiblock_atomic_add_f16_f32_f32_add.hpp | 0 ...stance_multiblock_atomic_add_f16_f32_f32_avg.hpp | 0 ...stance_multiblock_atomic_add_f32_f32_f32_add.hpp | 0 ...stance_multiblock_atomic_add_f32_f32_f32_avg.hpp | 0 ...stance_multiblock_atomic_add_f32_f64_f32_add.hpp | 0 ...stance_multiblock_atomic_add_f32_f64_f32_avg.hpp | 0 ...stance_multiblock_atomic_add_f64_f64_f64_add.hpp | 0 ...stance_multiblock_atomic_add_f64_f64_f64_avg.hpp | 0 .../reduce/device_reduce_instance_threadwise.hpp | 0 ...e_reduce_instance_threadwise_b16_f32_b16_add.hpp | 0 ..._reduce_instance_threadwise_b16_f32_b16_amax.hpp | 0 ...e_reduce_instance_threadwise_b16_f32_b16_avg.hpp | 0 ...e_reduce_instance_threadwise_b16_f32_b16_max.hpp | 0 ...e_reduce_instance_threadwise_b16_f32_b16_min.hpp | 0 ...reduce_instance_threadwise_b16_f32_b16_norm2.hpp | 0 ..._reduce_instance_threadwise_f16_f16_f16_amax.hpp | 0 ...e_reduce_instance_threadwise_f16_f16_f16_max.hpp | 0 ...e_reduce_instance_threadwise_f16_f16_f16_min.hpp | 0 ...e_reduce_instance_threadwise_f16_f32_f16_add.hpp | 0 ...e_reduce_instance_threadwise_f16_f32_f16_avg.hpp | 0 ...reduce_instance_threadwise_f16_f32_f16_norm2.hpp | 0 ...e_reduce_instance_threadwise_f32_f32_f32_add.hpp | 0 ..._reduce_instance_threadwise_f32_f32_f32_amax.hpp | 0 ...e_reduce_instance_threadwise_f32_f32_f32_avg.hpp | 0 ...e_reduce_instance_threadwise_f32_f32_f32_max.hpp | 0 ...e_reduce_instance_threadwise_f32_f32_f32_min.hpp | 0 ...reduce_instance_threadwise_f32_f32_f32_norm2.hpp | 0 ...e_reduce_instance_threadwise_f32_f64_f32_add.hpp | 0 ...e_reduce_instance_threadwise_f32_f64_f32_avg.hpp | 0 ...reduce_instance_threadwise_f32_f64_f32_norm2.hpp | 0 ...e_reduce_instance_threadwise_f64_f64_f64_add.hpp | 0 ..._reduce_instance_threadwise_f64_f64_f64_amax.hpp | 0 ...e_reduce_instance_threadwise_f64_f64_f64_avg.hpp | 0 ...e_reduce_instance_threadwise_f64_f64_f64_max.hpp | 0 ...e_reduce_instance_threadwise_f64_f64_f64_min.hpp | 0 ...reduce_instance_threadwise_f64_f64_f64_norm2.hpp | 0 ...ice_reduce_instance_threadwise_i8_i32_i8_add.hpp | 0 ...ice_reduce_instance_threadwise_i8_i32_i8_avg.hpp | 0 ...ice_reduce_instance_threadwise_i8_i8_i8_amax.hpp | 0 ...vice_reduce_instance_threadwise_i8_i8_i8_max.hpp | 0 ...vice_reduce_instance_threadwise_i8_i8_i8_min.hpp | 0 .../tensor_operation_instance/gpu/reduce/reduce.hpp | 0 .../tensor_operation_instance/gpu/softmax.hpp | 0 ...evice_softmax_f16_f16_instance_rank3_reduce1.hpp | 0 ...evice_softmax_f16_f16_instance_rank3_reduce2.hpp | 0 ...evice_softmax_f16_f16_instance_rank3_reduce3.hpp | 0 ...evice_softmax_f16_f16_instance_rank4_reduce1.hpp | 0 ...evice_softmax_f16_f16_instance_rank4_reduce2.hpp | 0 ...evice_softmax_f16_f16_instance_rank4_reduce3.hpp | 0 ...evice_softmax_f16_f16_instance_rank4_reduce4.hpp | 0 .../device_softmax_f16_f16_instance_type.hpp | 0 ...evice_softmax_f32_f32_instance_rank3_reduce1.hpp | 0 ...evice_softmax_f32_f32_instance_rank3_reduce2.hpp | 0 ...evice_softmax_f32_f32_instance_rank3_reduce3.hpp | 0 ...evice_softmax_f32_f32_instance_rank4_reduce1.hpp | 0 ...evice_softmax_f32_f32_instance_rank4_reduce2.hpp | 0 ...evice_softmax_f32_f32_instance_rank4_reduce3.hpp | 0 ...evice_softmax_f32_f32_instance_rank4_reduce4.hpp | 0 .../device_softmax_f32_f32_instance_type.hpp | 0 .../gpu/softmax/device_softmax_instance.hpp | 0 .../gpu/tall_and_skinny_gemm_splitk.hpp | 0 .../include/ck/library/utility/algorithm.hpp | 0 .../include/ck/library/utility/check_err.hpp | 0 .../include/ck/library/utility/conv_common.hpp | 0 .../convolution_host_tensor_descriptor_helper.hpp | 0 .../ck/library/utility/convolution_parameter.hpp | 0 .../include/ck/library/utility/device_memory.hpp | 0 .../include/ck/library/utility/fill.hpp | 0 .../include/ck/library/utility/host_common_util.hpp | 0 .../include/ck/library/utility/host_gemm.hpp | 0 .../include/ck/library/utility/host_tensor.hpp | 0 .../ck/library/utility/host_tensor_generator.hpp | 0 .../include/ck/library/utility/iterator.hpp | 0 .../include/ck/library/utility/literals.hpp | 0 .../include/ck/library/utility/numeric.hpp | 0 .../include/ck/library/utility/ranges.hpp | 0 .../tensor_operation_instance/gpu/CMakeLists.txt | 0 .../gpu/avg_pool3d_bwd/CMakeLists.txt | 0 .../avg_pool3d_bwd_ndhwc_instance_common.hpp | 0 .../device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp | 0 .../device_avg_pool3d_bwd_ndhwc_f16_instance.cpp | 0 .../device_avg_pool3d_bwd_ndhwc_f32_instance.cpp | 0 .../gpu/batched_gemm/CMakeLists.txt | 0 ...gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp | 0 ...gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp | 0 ...gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp | 0 ...gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp | 0 ...ed_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp | 0 ...ed_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp | 0 ...ed_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp | 0 ...ed_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp | 0 ...ed_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp | 0 ...ed_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp | 0 ...ed_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp | 0 ...ed_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp | 0 ...gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp | 0 ...gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp | 0 ...gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp | 0 ...gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp | 0 .../batched_gemm_add_relu_gemm_add/CMakeLists.txt | 0 ...fle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp | 0 ...fle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp | 0 .../gpu/batched_gemm_bias_permute/CMakeLists.txt | 0 ...n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp | 0 .../gpu/batched_gemm_gemm/CMakeLists.txt | 0 ...fle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp | 0 ...fle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp | 0 .../gpu/batched_gemm_multi_d/CMakeLists.txt | 0 ..._multi_d_dl_f16_f16_f16_gkm_gkn_gmn_instance.cpp | 0 ...l_f16_f16_f16_gkm_gkn_gmn_irregular_instance.cpp | 0 ..._multi_d_dl_f16_f16_f16_gkm_gnk_gmn_instance.cpp | 0 ...l_f16_f16_f16_gkm_gnk_gmn_irregular_instance.cpp | 0 ..._multi_d_dl_f16_f16_f16_gmk_gkn_gmn_instance.cpp | 0 ...l_f16_f16_f16_gmk_gkn_gmn_irregular_instance.cpp | 0 ..._multi_d_dl_f16_f16_f16_gmk_gnk_gmn_instance.cpp | 0 ...l_f16_f16_f16_gmk_gnk_gmn_irregular_instance.cpp | 0 ...emm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_instance.cpp | 0 ...d_dl_i8_i8_i8_gkm_gkn_gmn_irregular_instance.cpp | 0 ...emm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_instance.cpp | 0 ...d_dl_i8_i8_i8_gkm_gnk_gmn_irregular_instance.cpp | 0 ...emm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_instance.cpp | 0 ...d_dl_i8_i8_i8_gmk_gkn_gmn_irregular_instance.cpp | 0 ...emm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_instance.cpp | 0 ...d_dl_i8_i8_i8_gmk_gnk_gmn_irregular_instance.cpp | 0 .../gpu/batched_gemm_reduce/CMakeLists.txt | 0 ...fle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp | 0 ...fle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp | 0 ...fle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp | 0 ...fle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp | 0 .../gpu/batched_gemm_softmax_gemm/CMakeLists.txt | 0 ...fle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp | 0 .../CMakeLists.txt | 0 ...bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp | 0 ...fle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp | 0 ...bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp | 0 ...fle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp | 0 .../gpu/batchnorm/CMakeLists.txt | 0 .../device_batchnorm_backward_bf16_instance.cpp | 0 .../device_batchnorm_backward_f16_instance.cpp | 0 .../device_batchnorm_backward_f32_instance.cpp | 0 .../device_batchnorm_backward_f64_instance.cpp | 0 .../device_batchnorm_forward_bf16_instance.cpp | 0 .../device_batchnorm_forward_f16_instance.cpp | 0 .../device_batchnorm_forward_f32_instance.cpp | 0 .../device_batchnorm_forward_f64_instance.cpp | 0 .../device_batchnorm_infer_bf16_instance.cpp | 0 .../device_batchnorm_infer_f16_instance.cpp | 0 .../device_batchnorm_infer_f32_instance.cpp | 0 .../device_batchnorm_infer_f64_instance.cpp | 0 .../gpu/column_to_image/CMakeLists.txt | 0 .../device_column_to_image_nhwc_1d_instance.cpp | 0 .../device_column_to_image_nhwc_2d_instance.cpp | 0 .../device_column_to_image_nhwc_3d_instance.cpp | 0 .../gpu/contraction_bilinear/CMakeLists.txt | 0 ..._xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp | 0 ..._xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp | 0 ..._xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp | 0 ..._xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp | 0 ..._xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp | 0 ..._xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp | 0 ..._xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp | 0 ..._xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp | 0 .../gpu/contraction_scale/CMakeLists.txt | 0 ...n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp | 0 ...n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp | 0 ...n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp | 0 ...n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp | 0 ...n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp | 0 ...n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp | 0 ...n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp | 0 ...n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp | 0 .../gpu/conv1d_bwd_data/CMakeLists.txt | 0 ...onv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp | 0 ...conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp | 0 ...conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp | 0 ...onv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp | 0 .../gpu/conv2d_bwd_data/CMakeLists.txt | 0 ...nv2d_bwd_data_dl_nhwc_kyxc_nhwk_f16_instance.cpp | 0 ...nv2d_bwd_data_dl_nhwc_kyxc_nhwk_f32_instance.cpp | 0 ...v2d_bwd_data_dl_nhwc_kyxc_nhwk_int8_instance.cpp | 0 ...2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp | 0 ...v2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp | 0 ...v2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp | 0 ...2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp | 0 .../gpu/conv2d_fwd/CMakeLists.txt | 0 ...wd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp | 0 ..._conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp | 0 ...e_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp | 0 ...e_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp | 0 ..._conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp | 0 .../gpu/conv2d_fwd_bias_relu/CMakeLists.txt | 0 ...huffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp | 0 .../gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt | 0 ...le_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp | 0 .../gpu/conv3d_bwd_data/CMakeLists.txt | 0 ...bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp | 0 ..._bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp | 0 ..._bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp | 0 ...bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp | 0 .../gpu/elementwise/CMakeLists.txt | 0 .../gpu/elementwise/device_normalize_instance.cpp | 0 .../gpu/elementwise_normalization/CMakeLists.txt | 0 ...evice_elementwise_normalization_f16_instance.cpp | 0 .../gpu/gemm/CMakeLists.txt | 0 ...device_gemm_dl_f16_f16_f16_km_kn_mn_instance.cpp | 0 ...m_dl_f16_f16_f16_km_kn_mn_irregular_instance.cpp | 0 ...device_gemm_dl_f16_f16_f16_km_nk_mn_instance.cpp | 0 ...m_dl_f16_f16_f16_km_nk_mn_irregular_instance.cpp | 0 ...device_gemm_dl_f16_f16_f16_mk_kn_mn_instance.cpp | 0 ...m_dl_f16_f16_f16_mk_kn_mn_irregular_instance.cpp | 0 ...device_gemm_dl_f16_f16_f16_mk_nk_mn_instance.cpp | 0 ...m_dl_f16_f16_f16_mk_nk_mn_irregular_instance.cpp | 0 ...device_gemm_dl_f32_f32_f32_km_kn_mn_instance.cpp | 0 ...device_gemm_dl_f32_f32_f32_km_nk_mn_instance.cpp | 0 ...device_gemm_dl_f32_f32_f32_mk_kn_mn_instance.cpp | 0 ...device_gemm_dl_f32_f32_f32_mk_nk_mn_instance.cpp | 0 .../device_gemm_dl_i8_i8_i8_km_kn_mn_instance.cpp | 0 ...gemm_dl_i8_i8_i8_km_kn_mn_irregular_instance.cpp | 0 .../device_gemm_dl_i8_i8_i8_km_nk_mn_instance.cpp | 0 ...gemm_dl_i8_i8_i8_km_nk_mn_irregular_instance.cpp | 0 .../device_gemm_dl_i8_i8_i8_mk_kn_mn_instance.cpp | 0 ...gemm_dl_i8_i8_i8_mk_kn_mn_irregular_instance.cpp | 0 .../device_gemm_dl_i8_i8_i8_mk_nk_mn_instance.cpp | 0 ...gemm_dl_i8_i8_i8_mk_nk_mn_irregular_instance.cpp | 0 ...evice_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp | 0 ..._dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp | 0 ...evice_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp | 0 ..._dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp | 0 ...evice_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp | 0 ..._dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp | 0 ...evice_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp | 0 ..._dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp | 0 ...huffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp | 0 ...l_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp | 0 ...l_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp | 0 ...l_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp | 0 ...l_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp | 0 ..._xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp | 0 ..._xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp | 0 ..._xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp | 0 ..._xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp | 0 ..._xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp | 0 ..._xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp | 0 ..._xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp | 0 ..._xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp | 0 ..._xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp | 0 ..._xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp | 0 ..._xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_instance.cpp | 0 ..._xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp | 0 ...emm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp | 0 ...emm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp | 0 ...emm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp | 0 ...emm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp | 0 .../gpu/gemm/device_gemm_xdl_f16_f16_f16/common.hpp | 0 .../km_kn_mn_add_instance.cpp | 0 .../km_kn_mn_default_pipeline_v1_instance.cpp | 0 .../km_kn_mn_default_pipeline_v2_instance.cpp | 0 .../km_kn_mn_default_pipeline_v2_opt_instance.cpp | 0 .../km_kn_mn_interwave_pipeline_v1_instance.cpp | 0 ...kn_mn_irregular_default_pipeline_v1_instance.cpp | 0 ...kn_mn_irregular_default_pipeline_v2_instance.cpp | 0 ..._mn_irregular_interwave_pipeline_v1_instance.cpp | 0 .../km_nk_mn_add_instance.cpp | 0 .../km_nk_mn_default_pipeline_v1_instance.cpp | 0 .../km_nk_mn_default_pipeline_v2_instance.cpp | 0 .../km_nk_mn_default_pipeline_v2_opt_instance.cpp | 0 .../km_nk_mn_interwave_pipeline_v1_instance.cpp | 0 ...nk_mn_irregular_default_pipeline_v1_instance.cpp | 0 ...nk_mn_irregular_default_pipeline_v2_instance.cpp | 0 ..._mn_irregular_interwave_pipeline_v1_instance.cpp | 0 .../mk_kn_mn_add_instance.cpp | 0 .../mk_kn_mn_default_pipeline_v1_instance.cpp | 0 .../mk_kn_mn_default_pipeline_v2_instance.cpp | 0 .../mk_kn_mn_default_pipeline_v2_opt_instance.cpp | 0 .../mk_kn_mn_interwave_pipeline_v1_instance.cpp | 0 ...kn_mn_irregular_default_pipeline_v1_instance.cpp | 0 ...kn_mn_irregular_default_pipeline_v2_instance.cpp | 0 ..._mn_irregular_interwave_pipeline_v1_instance.cpp | 0 .../mk_nk_mn_add_instance.cpp | 0 .../mk_nk_mn_default_pipeline_v1_instance.cpp | 0 .../mk_nk_mn_default_pipeline_v2_instance.cpp | 0 .../mk_nk_mn_default_pipeline_v2_opt_instance.cpp | 0 .../mk_nk_mn_interwave_pipeline_v1_instance.cpp | 0 ...nk_mn_irregular_default_pipeline_v1_instance.cpp | 0 ...nk_mn_irregular_default_pipeline_v2_instance.cpp | 0 ..._mn_irregular_interwave_pipeline_v1_instance.cpp | 0 ...evice_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp | 0 ...evice_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp | 0 ...evice_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp | 0 ...evice_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp | 0 ...evice_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp | 0 ...evice_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp | 0 ...evice_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp | 0 ...evice_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp | 0 .../gpu/gemm_add_add_fastgelu/CMakeLists.txt | 0 ..._f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp | 0 ..._f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp | 0 ..._f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp | 0 ..._f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp | 0 .../gpu/gemm_add_fastgelu/CMakeLists.txt | 0 ...shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp | 0 ...shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp | 0 ...shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp | 0 ...shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp | 0 .../gpu/gemm_add_multiply/CMakeLists.txt | 0 ..._f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp | 0 ..._f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp | 0 ..._f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp | 0 ..._f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp | 0 .../gpu/gemm_add_relu_add_layernorm/CMakeLists.txt | 0 ...huffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp | 0 ...huffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp | 0 ...huffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp | 0 ...huffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp | 0 .../gpu/gemm_bias_add_reduce/CMakeLists.txt | 0 ...huffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp | 0 ...huffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp | 0 ...huffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp | 0 ...huffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp | 0 .../gpu/gemm_bilinear/CMakeLists.txt | 0 ...a_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp | 0 ...a_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp | 0 ...a_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp | 0 ...a_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp | 0 ...shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp | 0 ...shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp | 0 ...shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp | 0 ...shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp | 0 .../gpu/gemm_fastgelu/CMakeLists.txt | 0 ..._xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp | 0 ..._xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp | 0 ..._xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp | 0 ..._xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp | 0 .../gpu/gemm_multiply_add/CMakeLists.txt | 0 ..._f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp | 0 ..._f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp | 0 ...e_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp | 0 ...e_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp | 0 .../gpu/gemm_reduce/CMakeLists.txt | 0 ...huffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp | 0 ...huffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp | 0 ...huffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp | 0 ...huffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp | 0 .../gpu/gemm_splitk/CMakeLists.txt | 0 ...plitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp | 0 ...plitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp | 0 ...plitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp | 0 ...plitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp | 0 ...emm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp | 0 ...emm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp | 0 ...emm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp | 0 ...emm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp | 0 ...emm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp | 0 ...emm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp | 0 ...emm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp | 0 ...emm_xdl_splitk_f16_fp8_f16_mk_nk_mn_instance.cpp | 0 ...emm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp | 0 ...emm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp | 0 ...emm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp | 0 ...emm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp | 0 ...emm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp | 0 ...emm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp | 0 ...emm_xdl_splitk_fp8_f16_f16_mk_kn_mn_instance.cpp | 0 ...emm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp | 0 .../gpu/gemm_streamk/CMakeLists.txt | 0 ...mm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp | 0 .../gpu/gemv_splitk/CMakeLists.txt | 0 ...ce_gemv_splitk_f16_f16_f16_mk_kn_mn_instance.cpp | 0 ...ce_gemv_splitk_f16_f16_f16_mk_nk_mn_instance.cpp | 0 .../gpu/grouped_conv1d_bwd_weight/CMakeLists.txt | 0 ...d_bwd_weight_dl_gnwc_gkxc_gnwk_bf16_instance.cpp | 0 ...1d_bwd_weight_dl_gnwc_gkxc_gnwk_f16_instance.cpp | 0 ...1d_bwd_weight_dl_gnwc_gkxc_gnwk_f32_instance.cpp | 0 ...d_bwd_weight_dl_nwgc_gkxc_nwgk_bf16_instance.cpp | 0 ...1d_bwd_weight_dl_nwgc_gkxc_nwgk_f16_instance.cpp | 0 ...1d_bwd_weight_dl_nwgc_gkxc_nwgk_f32_instance.cpp | 0 ..._bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp | 0 ...d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp | 0 ...d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp | 0 .../gpu/grouped_conv1d_fwd/CMakeLists.txt | 0 ..._conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp | 0 ...d_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp | 0 ...d_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp | 0 ..._conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp | 0 .../gpu/grouped_conv2d_bwd_data/CMakeLists.txt | 0 ..._wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp | 0 ...bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp | 0 ...a_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp | 0 ..._bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp | 0 ..._wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp | 0 ...bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp | 0 ...a_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp | 0 ..._bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp | 0 ...bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp | 0 ..._bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp | 0 ..._bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp | 0 ...bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp | 0 ..._bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp | 0 ..._bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp | 0 .../gpu/grouped_conv2d_bwd_weight/CMakeLists.txt | 0 ...wd_weight_dl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp | 0 ...bwd_weight_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp | 0 ...bwd_weight_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp | 0 ...wd_weight_dl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp | 0 ...bwd_weight_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp | 0 ...bwd_weight_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp | 0 ...d_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp | 0 ...wd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp | 0 ...wd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp | 0 ...d_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp | 0 ...wd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp | 0 ...wd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp | 0 .../gpu/grouped_conv2d_fwd/CMakeLists.txt | 0 ...nv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp | 0 ...onv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp | 0 ...conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp | 0 ...conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp | 0 ...conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp | 0 ...conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp | 0 ...wd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp | 0 ..._wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp | 0 ...nv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp | 0 ...fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp | 0 ...fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp | 0 ...d_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp | 0 ...onv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp | 0 ..._fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp | 0 ...wd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp | 0 ..._wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp | 0 ...nv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp | 0 ...fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp | 0 ...fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp | 0 ...d_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp | 0 ...onv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp | 0 ..._fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp | 0 ...nv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp | 0 ...onv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp | 0 ...onv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp | 0 ...nv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp | 0 ...onv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp | 0 ...onv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp | 0 .../gpu/grouped_conv3d_bwd_data/CMakeLists.txt | 0 ...ma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp | 0 ..._data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp | 0 ...mma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp | 0 ...d_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp | 0 ...ma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp | 0 ..._data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp | 0 ...mma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp | 0 ...d_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp | 0 ..._data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp | 0 ...d_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp | 0 ...d_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp | 0 ..._data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp | 0 ...d_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp | 0 ...d_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp | 0 ...gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp | 0 .../gpu/grouped_conv3d_bwd_weight/CMakeLists.txt | 0 ...weight_dl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp | 0 ..._weight_dl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp | 0 ..._weight_dl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp | 0 ...weight_dl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp | 0 ..._weight_dl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp | 0 ..._weight_dl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp | 0 ...ma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp | 0 ...eight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp | 0 ...mma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp | 0 ...weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp | 0 ...ma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp | 0 ...eight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp | 0 ...mma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp | 0 ...weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp | 0 ...eight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp | 0 ...weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp | 0 ...weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp | 0 ...eight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp | 0 ...hwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp | 0 ...weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp | 0 ...weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp | 0 .../gpu/grouped_conv3d_fwd/CMakeLists.txt | 0 ...wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp | 0 ...ma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp | 0 ...d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp | 0 ..._wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp | 0 ..._wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp | 0 ...mma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp | 0 ...3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp | 0 ...d_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp | 0 ...wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp | 0 ...ma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp | 0 ...d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp | 0 ..._wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp | 0 ..._wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp | 0 ...mma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp | 0 ...3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp | 0 ...d_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp | 0 ...d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp | 0 ...3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp | 0 ...3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp | 0 ...d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp | 0 ...d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp | 0 ...l_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp | 0 ...3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp | 0 ...3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp | 0 ...d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp | 0 .../gpu/grouped_gemm/CMakeLists.txt | 0 ...ouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp | 0 ...ouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp | 0 ...ouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp | 0 ...ouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp | 0 ...emm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp | 0 ...litk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp | 0 ...emm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp | 0 ...litk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp | 0 .../gpu/grouped_gemm_bias/CMakeLists.txt | 0 ..._fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp | 0 ..._fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp | 0 ..._fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp | 0 ..._fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp | 0 .../gpu/grouped_gemm_fastgelu/CMakeLists.txt | 0 ...m_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp | 0 ...m_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp | 0 ...m_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp | 0 ...m_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp | 0 .../gpu/grouped_gemm_fixed_nk/CMakeLists.txt | 0 ...m_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp | 0 ...m_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp | 0 ...m_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp | 0 ...m_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp | 0 ...mm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp | 0 ...mm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp | 0 .../gpu/image_to_column/CMakeLists.txt | 0 .../device_image_to_column_nhwc_1d_instance.cpp | 0 .../device_image_to_column_nhwc_2d_instance.cpp | 0 .../device_image_to_column_nhwc_3d_instance.cpp | 0 .../gpu/max_pool_bwd/CMakeLists.txt | 0 .../device_max_pool_bwd_bf16_instance.cpp | 0 .../device_max_pool_bwd_f16_instance.cpp | 0 .../device_max_pool_bwd_f32_instance.cpp | 0 .../max_pool_bwd/max_pool_bwd_instance_common.hpp | 0 .../gpu/normalization/CMakeLists.txt | 0 .../normalization/device_groupnorm_f16_instance.cpp | 0 .../normalization/device_groupnorm_f32_instance.cpp | 0 ...ice_groupnorm_swish_f16_f32_f32_f16_instance.cpp | 0 .../device_groupnorm_swish_f16_instance.cpp | 0 .../device_groupnorm_swish_f32_instance.cpp | 0 .../device_layernorm2d_f16_instance.cpp | 0 .../device_layernorm2d_f32_instance.cpp | 0 .../device_layernorm4d_f16_instance.cpp | 0 .../device_layernorm4d_f32_instance.cpp | 0 .../normalization/normalization_instance_common.hpp | 0 .../gpu/pool3d_fwd/CMakeLists.txt | 0 .../device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp | 0 .../device_avg_pool3d_fwd_ndhwc_f16_instance.cpp | 0 .../device_avg_pool3d_fwd_ndhwc_f32_instance.cpp | 0 .../device_max_pool3d_fwd_ndhwc_bf16_instance.cpp | 0 .../device_max_pool3d_fwd_ndhwc_f16_instance.cpp | 0 .../device_max_pool3d_fwd_ndhwc_f32_instance.cpp | 0 .../gpu/pool3d_fwd/pool_fwd_instance_common.hpp | 0 .../gpu/quantization/CMakeLists.txt | 0 .../conv2d_fwd/conv2d_quantization_common.hpp | 0 ...l_bias_perchannel_quantization_int8_instance.cpp | 0 ..._dl_bias_perlayer_quantization_int8_instance.cpp | 0 .../conv2d_fwd/device_conv2d_dl_int8_instance.hpp | 0 ...v2d_dl_perchannel_quantization_int8_instance.cpp | 0 ...onv2d_dl_perlayer_quantization_int8_instance.cpp | 0 ...l_bias_perchannel_quantization_int8_instance.cpp | 0 ...xdl_bias_perlayer_quantization_int8_instance.cpp | 0 .../conv2d_fwd/device_conv2d_xdl_int8_instance.hpp | 0 ...2d_xdl_perchannel_quantization_int8_instance.cpp | 0 ...nv2d_xdl_perlayer_quantization_int8_instance.cpp | 0 ..._quantization_dl_c_shuffle_i8_i8_i8_instance.hpp | 0 ...tion_dl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp | 0 ...tion_dl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp | 0 ...tion_dl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp | 0 ...tion_dl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp | 0 ...quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp | 0 ...ion_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp | 0 ...ion_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp | 0 ...ion_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp | 0 ...ion_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp | 0 .../quantization/gemm/gemm_quantization_common.hpp | 0 .../gpu/reduce/CMakeLists.txt | 0 ...ce_reduce_instance_blockwise_b16_f32_b16_add.cpp | 0 ...e_reduce_instance_blockwise_b16_f32_b16_amax.cpp | 0 ...ce_reduce_instance_blockwise_b16_f32_b16_avg.cpp | 0 ...ce_reduce_instance_blockwise_b16_f32_b16_max.cpp | 0 ...ce_reduce_instance_blockwise_b16_f32_b16_min.cpp | 0 ..._reduce_instance_blockwise_b16_f32_b16_norm2.cpp | 0 ...e_reduce_instance_blockwise_f16_f16_f16_amax.cpp | 0 ...ce_reduce_instance_blockwise_f16_f16_f16_max.cpp | 0 ...ce_reduce_instance_blockwise_f16_f16_f16_min.cpp | 0 ...ce_reduce_instance_blockwise_f16_f32_f16_add.cpp | 0 ...ce_reduce_instance_blockwise_f16_f32_f16_avg.cpp | 0 ..._reduce_instance_blockwise_f16_f32_f16_norm2.cpp | 0 ...ce_reduce_instance_blockwise_f32_f32_f32_add.cpp | 0 ...e_reduce_instance_blockwise_f32_f32_f32_amax.cpp | 0 ...ce_reduce_instance_blockwise_f32_f32_f32_avg.cpp | 0 ...ce_reduce_instance_blockwise_f32_f32_f32_max.cpp | 0 ...ce_reduce_instance_blockwise_f32_f32_f32_min.cpp | 0 ..._reduce_instance_blockwise_f32_f32_f32_norm2.cpp | 0 ...ce_reduce_instance_blockwise_f32_f64_f32_add.cpp | 0 ...ce_reduce_instance_blockwise_f32_f64_f32_avg.cpp | 0 ..._reduce_instance_blockwise_f32_f64_f32_norm2.cpp | 0 ...ce_reduce_instance_blockwise_f64_f64_f64_add.cpp | 0 ...e_reduce_instance_blockwise_f64_f64_f64_amax.cpp | 0 ...ce_reduce_instance_blockwise_f64_f64_f64_avg.cpp | 0 ...ce_reduce_instance_blockwise_f64_f64_f64_max.cpp | 0 ...ce_reduce_instance_blockwise_f64_f64_f64_min.cpp | 0 ..._reduce_instance_blockwise_f64_f64_f64_norm2.cpp | 0 ...vice_reduce_instance_blockwise_i8_i32_i8_add.cpp | 0 ...vice_reduce_instance_blockwise_i8_i32_i8_avg.cpp | 0 ...vice_reduce_instance_blockwise_i8_i8_i8_amax.cpp | 0 ...evice_reduce_instance_blockwise_i8_i8_i8_max.cpp | 0 ...evice_reduce_instance_blockwise_i8_i8_i8_min.cpp | 0 ...stance_multiblock_atomic_add_b16_f32_f32_add.cpp | 0 ...stance_multiblock_atomic_add_b16_f32_f32_avg.cpp | 0 ...stance_multiblock_atomic_add_f16_f32_f32_add.cpp | 0 ...stance_multiblock_atomic_add_f16_f32_f32_avg.cpp | 0 ...stance_multiblock_atomic_add_f32_f32_f32_add.cpp | 0 ...stance_multiblock_atomic_add_f32_f32_f32_avg.cpp | 0 ...stance_multiblock_atomic_add_f32_f64_f32_add.cpp | 0 ...stance_multiblock_atomic_add_f32_f64_f32_avg.cpp | 0 ...stance_multiblock_atomic_add_f64_f64_f64_add.cpp | 0 ...stance_multiblock_atomic_add_f64_f64_f64_avg.cpp | 0 ...e_reduce_instance_threadwise_b16_f32_b16_add.cpp | 0 ..._reduce_instance_threadwise_b16_f32_b16_amax.cpp | 0 ...e_reduce_instance_threadwise_b16_f32_b16_avg.cpp | 0 ...e_reduce_instance_threadwise_b16_f32_b16_max.cpp | 0 ...e_reduce_instance_threadwise_b16_f32_b16_min.cpp | 0 ...reduce_instance_threadwise_b16_f32_b16_norm2.cpp | 0 ..._reduce_instance_threadwise_f16_f16_f16_amax.cpp | 0 ...e_reduce_instance_threadwise_f16_f16_f16_max.cpp | 0 ...e_reduce_instance_threadwise_f16_f16_f16_min.cpp | 0 ...e_reduce_instance_threadwise_f16_f32_f16_add.cpp | 0 ...e_reduce_instance_threadwise_f16_f32_f16_avg.cpp | 0 ...reduce_instance_threadwise_f16_f32_f16_norm2.cpp | 0 ...e_reduce_instance_threadwise_f32_f32_f32_add.cpp | 0 ..._reduce_instance_threadwise_f32_f32_f32_amax.cpp | 0 ...e_reduce_instance_threadwise_f32_f32_f32_avg.cpp | 0 ...e_reduce_instance_threadwise_f32_f32_f32_max.cpp | 0 ...e_reduce_instance_threadwise_f32_f32_f32_min.cpp | 0 ...reduce_instance_threadwise_f32_f32_f32_norm2.cpp | 0 ...e_reduce_instance_threadwise_f32_f64_f32_add.cpp | 0 ...e_reduce_instance_threadwise_f32_f64_f32_avg.cpp | 0 ...reduce_instance_threadwise_f32_f64_f32_norm2.cpp | 0 ...e_reduce_instance_threadwise_f64_f64_f64_add.cpp | 0 ..._reduce_instance_threadwise_f64_f64_f64_amax.cpp | 0 ...e_reduce_instance_threadwise_f64_f64_f64_avg.cpp | 0 ...e_reduce_instance_threadwise_f64_f64_f64_max.cpp | 0 ...e_reduce_instance_threadwise_f64_f64_f64_min.cpp | 0 ...reduce_instance_threadwise_f64_f64_f64_norm2.cpp | 0 ...ice_reduce_instance_threadwise_i8_i32_i8_add.cpp | 0 ...ice_reduce_instance_threadwise_i8_i32_i8_avg.cpp | 0 ...ice_reduce_instance_threadwise_i8_i8_i8_amax.cpp | 0 ...vice_reduce_instance_threadwise_i8_i8_i8_max.cpp | 0 ...vice_reduce_instance_threadwise_i8_i8_i8_min.cpp | 0 .../gpu/softmax/CMakeLists.txt | 0 ...evice_softmax_f16_f16_instance_rank3_reduce1.cpp | 0 ...evice_softmax_f16_f16_instance_rank3_reduce2.cpp | 0 ...evice_softmax_f16_f16_instance_rank3_reduce3.cpp | 0 ...evice_softmax_f16_f16_instance_rank4_reduce1.cpp | 0 ...evice_softmax_f16_f16_instance_rank4_reduce2.cpp | 0 ...evice_softmax_f16_f16_instance_rank4_reduce3.cpp | 0 ...evice_softmax_f16_f16_instance_rank4_reduce4.cpp | 0 ...evice_softmax_f32_f32_instance_rank3_reduce1.cpp | 0 ...evice_softmax_f32_f32_instance_rank3_reduce2.cpp | 0 ...evice_softmax_f32_f32_instance_rank3_reduce3.cpp | 0 ...evice_softmax_f32_f32_instance_rank4_reduce1.cpp | 0 ...evice_softmax_f32_f32_instance_rank4_reduce2.cpp | 0 ...evice_softmax_f32_f32_instance_rank4_reduce3.cpp | 0 ...evice_softmax_f32_f32_instance_rank4_reduce4.cpp | 0 .../gpu/tall_and_skinny_gemm_splitk/CMakeLists.txt | 0 ...ny_gemm_splitk_f16_f16_f16_mk_kn_mn_instance.cpp | 0 ...ny_gemm_splitk_f16_f16_f16_mk_nk_mn_instance.cpp | 0 .../library => library}/src/utility/CMakeLists.txt | 0 .../src/utility/convolution_parameter.cpp | 0 .../src/utility/device_memory.cpp | 0 .../library => library}/src/utility/host_tensor.cpp | 0 .../profiler => profiler}/CMakeLists.txt | 0 {composable_kernel/profiler => profiler}/README.md | 0 .../include/profiler/data_type_enum.hpp | 0 .../profiler/profile_avg_pool3d_bwd_impl.hpp | 0 .../profile_batched_gemm_add_relu_gemm_add_impl.hpp | 0 ..._batched_gemm_bias_softmax_gemm_permute_impl.hpp | 0 .../profiler/profile_batched_gemm_gemm_impl.hpp | 0 .../include/profiler/profile_batched_gemm_impl.hpp | 0 .../profiler/profile_batched_gemm_reduce_impl.hpp | 0 .../profile_batched_gemm_softmax_gemm_impl.hpp | 0 ...ofile_batched_gemm_softmax_gemm_permute_impl.hpp | 0 .../profiler/profile_batchnorm_backward_impl.hpp | 0 .../profiler/profile_batchnorm_forward_impl.hpp | 0 .../profiler/profile_batchnorm_infer_impl.hpp | 0 .../include/profiler/profile_contraction_impl.hpp | 0 .../include/profiler/profile_contraction_utils.hpp | 0 .../include/profiler/profile_conv_bwd_data_impl.hpp | 0 .../profile_conv_fwd_bias_relu_add_impl.hpp | 0 .../profiler/profile_conv_fwd_bias_relu_impl.hpp | 0 .../include/profiler/profile_conv_fwd_impl.hpp | 0 .../profiler/profile_conv_tensor_rearrange_impl.hpp | 0 .../profiler/profile_elementwise_layernorm_impl.hpp | 0 .../profiler/profile_gemm_add_add_fastgelu_impl.hpp | 0 .../profiler/profile_gemm_add_fastgelu_impl.hpp | 0 .../profiler/profile_gemm_add_multiply_impl.hpp | 0 .../profile_gemm_add_relu_add_layernorm_impl.hpp | 0 .../profiler/profile_gemm_bias_add_reduce_impl.hpp | 0 .../include/profiler/profile_gemm_bilinear_impl.hpp | 0 .../include/profiler/profile_gemm_fastgelu_impl.hpp | 0 .../include/profiler/profile_gemm_impl.hpp | 0 .../profiler/profile_gemm_multiply_add_impl.hpp | 0 .../include/profiler/profile_gemm_reduce_impl.hpp | 0 .../include/profiler/profile_gemm_splitk_impl.hpp | 0 .../include/profiler/profile_gemm_streamk_impl.hpp | 0 .../include/profiler/profile_gemv_splitk_impl.hpp | 0 .../profiler/profile_grouped_conv_bwd_data_impl.hpp | 0 .../profile_grouped_conv_bwd_weight_impl.hpp | 0 .../profiler/profile_grouped_conv_fwd_impl.hpp | 0 .../profiler/profile_grouped_gemm_fastgelu_impl.hpp | 0 .../include/profiler/profile_grouped_gemm_impl.hpp | 0 .../include/profiler/profile_groupnorm_impl.hpp | 0 .../include/profiler/profile_layernorm_impl.hpp | 0 .../profiler/profile_max_pool3d_bwd_impl.hpp | 0 .../include/profiler/profile_pool3d_fwd_impl.hpp | 0 .../include/profiler/profile_reduce_impl.hpp | 0 .../include/profiler/profile_softmax_impl.hpp | 0 .../profile_tall_and_skinny_gemm_splitk_impl.hpp | 0 .../profiler => profiler}/src/CMakeLists.txt | 0 .../src/profile_avg_pool3d_bwd.cpp | 0 .../src/profile_batched_gemm.cpp | 0 .../src/profile_batched_gemm_add_relu_gemm_add.cpp | 0 .../src/profile_batched_gemm_gemm.cpp | 0 .../src/profile_batched_gemm_multi_d.cpp | 0 .../src/profile_batched_gemm_reduce.cpp | 0 .../src/profile_batchnorm_bwd.cpp | 0 .../src/profile_batchnorm_fwd.cpp | 0 .../src/profile_batchnorm_infer.cpp | 0 .../src/profile_contraction_bilinear.cpp | 0 .../src/profile_contraction_scale.cpp | 0 .../src/profile_conv_bwd_data.cpp | 0 .../profiler => profiler}/src/profile_conv_fwd.cpp | 0 .../src/profile_conv_fwd_bias_relu.cpp | 0 .../src/profile_conv_fwd_bias_relu_add.cpp | 0 .../src/profile_conv_tensor_rearrange.cpp | 0 .../profiler => profiler}/src/profile_gemm.cpp | 0 .../src/profile_gemm_add_add_fastgelu.cpp | 0 .../src/profile_gemm_add_fastgelu.cpp | 0 .../src/profile_gemm_add_multiply.cpp | 0 .../src/profile_gemm_add_relu_add_layernorm.cpp | 0 .../src/profile_gemm_bias_add_reduce.cpp | 0 .../src/profile_gemm_bilinear.cpp | 0 .../src/profile_gemm_fastgelu.cpp | 0 .../src/profile_gemm_multiply_add.cpp | 0 .../src/profile_gemm_reduce.cpp | 0 .../src/profile_gemm_splitk.cpp | 0 .../src/profile_gemm_streamk.cpp | 0 .../src/profile_gemv_splitk.cpp | 0 .../src/profile_grouped_conv_bwd_data.cpp | 0 .../src/profile_grouped_conv_bwd_weight.cpp | 0 .../src/profile_grouped_conv_fwd.cpp | 0 .../src/profile_grouped_gemm.cpp | 0 .../src/profile_grouped_gemm_fastgelu.cpp | 0 .../profiler => profiler}/src/profile_groupnorm.cpp | 0 .../profiler => profiler}/src/profile_layernorm.cpp | 0 .../src/profile_max_pool3d_bwd.cpp | 0 .../src/profile_max_pool3d_fwd.cpp | 0 .../profiler => profiler}/src/profile_reduce.cpp | 0 .../profiler => profiler}/src/profile_softmax.cpp | 0 .../src/profile_tall_and_skinny_gemm_splitk.cpp | 0 .../profiler => profiler}/src/profiler.cpp | 0 .../src/profiler_operation_registry.hpp | 0 composable_kernel/rbuild.ini => rbuild.ini | 0 .../requirements.txt => requirements.txt | 0 .../script => script}/check_copyright_year.sh | 0 .../script => script}/clang-format-overwrite.sh | 0 .../script => script}/cmake-ck-dev.sh | 0 .../script => script}/cmake-ck-release.sh | 0 {composable_kernel/script => script}/count_vgpr.sh | 0 .../script => script}/hipclang_opt.sh | 0 .../script => script}/install_precommit.sh | 0 .../script => script}/parse_perf_data.py | 0 .../script => script}/process_perf_data.py | 0 .../script => script}/process_perf_data.sh | 0 .../script => script}/process_qa_data.sh | 0 .../script => script}/profile_batched_gemm.sh | 0 .../script => script}/profile_conv_bwd_data.sh | 0 .../script => script}/profile_conv_fwd.sh | 0 .../script => script}/profile_gemm.sh | 0 .../script => script}/profile_gemm_bilinear.sh | 0 .../script => script}/profile_grouped_gemm.sh | 0 .../script => script}/profile_onnx_gemm.sh | 0 .../script => script}/profile_reduce_no_index.sh | 0 .../script => script}/profile_reduce_with_index.sh | 0 .../script => script}/profile_resnet50.sh | 0 .../script => script}/profile_splitK_gemm.sh | 0 .../script => script}/run_full_performance_tests.sh | 0 .../script => script}/run_performance_tests.sh | 0 .../script => script}/test_convnd_fwd.sh | 0 .../script => script}/test_reduce_no_index.sh | 0 .../script => script}/test_reduce_with_index.sh | 0 .../script => script}/uninstall_precommit.sh | 0 {composable_kernel/test => test}/CMakeLists.txt | 0 .../test => test}/batched_gemm/CMakeLists.txt | 0 .../batched_gemm/batched_gemm_bf16.cpp | 0 .../batched_gemm/batched_gemm_fp16.cpp | 0 .../batched_gemm/batched_gemm_fp32.cpp | 0 .../batched_gemm/batched_gemm_int8.cpp | 0 .../batched_gemm/test_batched_gemm.cpp | 0 .../test => test}/batched_gemm_gemm/CMakeLists.txt | 0 .../test_batched_gemm_gemm_fp16.cpp | 0 .../test_batched_gemm_gemm_util.hpp | 0 .../batched_gemm_multi_d/CMakeLists.txt | 0 .../test_batched_gemm_multi_d_dl.cpp | 0 .../batched_gemm_reduce/CMakeLists.txt | 0 .../batched_gemm_reduce_fp16.cpp | 0 .../batched_gemm_softmax_gemm/CMakeLists.txt | 0 .../test_batched_gemm_softmax_gemm_fp16.cpp | 0 .../test_batched_gemm_softmax_gemm_util.hpp | 0 .../CMakeLists.txt | 0 ..._batched_gemm_bias_softmax_gemm_permute_bf16.cpp | 0 ..._batched_gemm_bias_softmax_gemm_permute_fp16.cpp | 0 ..._batched_gemm_bias_softmax_gemm_permute_util.hpp | 0 .../test_batched_gemm_softmax_gemm_permute_bf16.cpp | 0 .../test_batched_gemm_softmax_gemm_permute_fp16.cpp | 0 .../test_batched_gemm_softmax_gemm_permute_util.hpp | 0 .../test => test}/batchnorm/CMakeLists.txt | 0 .../batchnorm/batchnorm_bwd_rank_4.cpp | 0 .../batchnorm/batchnorm_fwd_rank_4.cpp | 0 .../batchnorm/batchnorm_infer_rank_4.cpp | 0 .../block_swizzle_test/block_swizzle_test.cpp | 0 .../test => test}/block_swizzle_test/rebuild.sh | 0 .../test => test}/block_swizzle_test/simple_args.h | 0 .../test => test}/block_to_ctile_map/CMakeLists.txt | 0 .../block_to_ctile_map/test_block_to_ctile_map.cpp | 0 .../test => test}/contraction/CMakeLists.txt | 0 .../test => test}/contraction/test_contraction.cpp | 0 .../contraction/test_contraction_interface.cpp | 0 .../conv_tensor_rearrange/CMakeLists.txt | 0 .../test_conv_tensor_rearrange.cpp | 0 .../test_conv_tensor_rearrange_interface.cpp | 0 .../test => test}/conv_util/CMakeLists.txt | 0 .../test => test}/conv_util/conv_util.cpp | 0 .../test => test}/convnd_bwd_data/CMakeLists.txt | 0 .../convnd_bwd_data/convnd_bwd_data.cpp | 0 .../test => test}/convnd_fwd/CMakeLists.txt | 0 .../test => test}/convnd_fwd/convnd_fwd.cpp | 0 .../test => test}/data_type/CMakeLists.txt | 0 .../test => test}/data_type/test_bf8.cpp | 0 .../test => test}/data_type/test_fp8.cpp | 0 .../test => test}/data_type/test_int4.cpp | 0 .../test => test}/data_type/type_convert_const.cpp | 0 .../elementwise_normalization/CMakeLists.txt | 0 .../test_elementwise_layernorm_fp16.cpp | 0 .../test => test}/gemm/CMakeLists.txt | 0 {composable_kernel/test => test}/gemm/gemm_bf16.cpp | 0 {composable_kernel/test => test}/gemm/gemm_fp16.cpp | 0 {composable_kernel/test => test}/gemm/gemm_fp32.cpp | 0 {composable_kernel/test => test}/gemm/gemm_fp64.cpp | 0 {composable_kernel/test => test}/gemm/gemm_int8.cpp | 0 .../test => test}/gemm/gemm_standalone_xdl_fp16.cpp | 0 {composable_kernel/test => test}/gemm/gemm_util.hpp | 0 .../gemm/instance/gemm_f16_nn_instance.cpp | 0 .../gemm/instance/gemm_f16_nn_instance.hpp | 0 .../gemm/instance/gemm_f16_nt_instance.cpp | 0 .../gemm/instance/gemm_f16_nt_instance.hpp | 0 .../gemm/instance/gemm_f16_tn_instance.cpp | 0 .../gemm/instance/gemm_f16_tn_instance.hpp | 0 .../gemm/instance/gemm_f16_tt_instance.cpp | 0 .../gemm/instance/gemm_f16_tt_instance.hpp | 0 .../gemm/instance/gemm_wavelet_f16_tn_instance.cpp | 0 .../gemm/instance/gemm_wavelet_f16_tn_instance.hpp | 0 .../test => test}/gemm/run_gemm_test.inc | 0 .../test => test}/gemm_layernorm/CMakeLists.txt | 0 .../test_gemm_add_relu_add_layernorm_fp16.cpp | 0 .../test => test}/gemm_reduce/CMakeLists.txt | 0 .../test => test}/gemm_reduce/gemm_reduce_fp16.cpp | 0 .../test => test}/gemm_split_k/CMakeLists.txt | 0 .../test => test}/gemm_split_k/test_gemm_splitk.cpp | 0 .../gemm_split_k/test_gemm_splitk_ut_cases.inc | 0 .../gemm_split_k/test_gemm_splitk_util.hpp | 0 .../grouped_convnd_bwd_data/CMakeLists.txt | 0 .../test_grouped_convnd_bwd_data.cpp | 0 .../test_grouped_convnd_bwd_data_interface_wmma.cpp | 0 .../test_grouped_convnd_bwd_data_interface_xdl.cpp | 0 .../grouped_convnd_bwd_weight/CMakeLists.txt | 0 .../test_grouped_convnd_bwd_weight.cpp | 0 ...est_grouped_convnd_bwd_weight_interface_wmma.cpp | 0 ...test_grouped_convnd_bwd_weight_interface_xdl.cpp | 0 .../test => test}/grouped_convnd_fwd/CMakeLists.txt | 0 .../grouped_convnd_fwd/grouped_convnd_fwd.cpp | 0 .../grouped_convnd_fwd/test_grouped_convnd_fwd.cpp | 0 .../test => test}/grouped_gemm/CMakeLists.txt | 0 .../grouped_gemm/test_grouped_gemm_interface.cpp | 0 .../grouped_gemm/test_grouped_gemm_splitk.cpp | 0 .../grouped_gemm/test_grouped_gemm_ut_cases.inc | 0 .../grouped_gemm/test_grouped_gemm_util.hpp | 0 .../test => test}/image_to_column/CMakeLists.txt | 0 .../image_to_column/test_image_to_column.cpp | 0 .../test_image_to_column_interface.cpp | 0 .../magic_number_division/CMakeLists.txt | 0 .../magic_number_division/magic_number_division.cpp | 0 .../test => test}/normalization/CMakeLists.txt | 0 .../normalization/test_groupnorm_fp16.cpp | 0 .../normalization/test_groupnorm_fp32.cpp | 0 .../normalization/test_layernorm2d_fp16.cpp | 0 .../normalization/test_layernorm2d_fp32.cpp | 0 .../test => test}/pool/CMakeLists.txt | 0 .../test => test}/pool/test_avg_pool3d_bwd.cpp | 0 .../test => test}/pool/test_avg_pool3d_fwd.cpp | 0 .../test => test}/pool/test_max_pool3d_bwd.cpp | 0 .../test => test}/pool/test_max_pool3d_fwd.cpp | 0 .../test => test}/pool/test_pool_fwd_common.hpp | 0 .../test => test}/reduce/CMakeLists.txt | 0 .../test => test}/reduce/reduce_no_index.cpp | 0 .../test => test}/reduce/reduce_with_index.cpp | 0 .../test => test}/reference_conv_fwd/CMakeLists.txt | 0 .../reference_conv_fwd/reference_conv_fwd.cpp | 0 .../test => test}/softmax/CMakeLists.txt | 0 .../softmax/test_softmax_interface.cpp | 0 .../test => test}/softmax/test_softmax_rank3.cpp | 0 .../test => test}/softmax/test_softmax_rank4.cpp | 0 .../test => test}/softmax/test_softmax_ut_cases.inc | 0 .../test => test}/softmax/test_softmax_util.hpp | 0 .../space_filling_curve/CMakeLists.txt | 0 .../space_filling_curve/space_filling_curve.cpp | 0 .../test => test}/wmma_op/CMakeLists.txt | 0 .../test => test}/wmma_op/wmma_op.cpp | 0 .../test => test}/wmma_op/wmma_op_util.hpp | 0 1828 files changed, 0 insertions(+), 0 deletions(-) rename composable_kernel/CHANGELOG.md => CHANGELOG.md (100%) rename composable_kernel/CITATION.cff => CITATION.cff (100%) rename composable_kernel/CMakeLists.txt => CMakeLists.txt (100%) rename composable_kernel/CONTRIBUTORS.md => CONTRIBUTORS.md (100%) rename composable_kernel/Config.cmake.in => Config.cmake.in (100%) rename composable_kernel/Dockerfile => Dockerfile (100%) rename composable_kernel/Jenkinsfile => Jenkinsfile (100%) rename composable_kernel/LICENSE => LICENSE (100%) rename composable_kernel/README.md => README.md (100%) rename {composable_kernel/client_example => client_example}/01_gemm/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/01_gemm/gemm.cpp (100%) rename {composable_kernel/client_example => client_example}/02_gemm_add_add_fastgelu/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp (100%) rename {composable_kernel/client_example => client_example}/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp (100%) rename {composable_kernel/client_example => client_example}/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp (100%) rename {composable_kernel/client_example => client_example}/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp (100%) rename {composable_kernel/client_example => client_example}/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp (100%) rename {composable_kernel/client_example => client_example}/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp (100%) rename {composable_kernel/client_example => client_example}/03_gemm_layernorm/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp (100%) rename {composable_kernel/client_example => client_example}/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp (100%) rename {composable_kernel/client_example => client_example}/04_contraction/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/04_contraction/contraction_bilinear_fp32.cpp (100%) rename {composable_kernel/client_example => client_example}/04_contraction/contraction_bilinear_fp64.cpp (100%) rename {composable_kernel/client_example => client_example}/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp (100%) rename {composable_kernel/client_example => client_example}/04_contraction/contraction_scale_fp32.cpp (100%) rename {composable_kernel/client_example => client_example}/04_contraction/contraction_scale_fp64.cpp (100%) rename {composable_kernel/client_example => client_example}/05_layernorm/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/05_layernorm/layernorm2d.cpp (100%) rename {composable_kernel/client_example => client_example}/06_softmax/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/06_softmax/softmax4d.cpp (100%) rename {composable_kernel/client_example => client_example}/07_grouped_convnd_fwd/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/07_grouped_convnd_fwd/grouped_conv1d_fwd.cpp (100%) rename {composable_kernel/client_example => client_example}/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp (100%) rename {composable_kernel/client_example => client_example}/08_fused_attention/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/08_fused_attention/fused_attention.cpp (100%) rename {composable_kernel/client_example => client_example}/08_fused_attention/fused_attention_bias.cpp (100%) rename {composable_kernel/client_example => client_example}/09_quantization/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/09_quantization/conv2d_fwd_bias_relu_perchannel_quantization.cpp (100%) rename {composable_kernel/client_example => client_example}/09_quantization/conv2d_fwd_bias_relu_perlayer_quantization.cpp (100%) rename {composable_kernel/client_example => client_example}/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp (100%) rename {composable_kernel/client_example => client_example}/09_quantization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp (100%) rename {composable_kernel/client_example => client_example}/09_quantization/conv2d_fwd_perchannel_quantization.cpp (100%) rename {composable_kernel/client_example => client_example}/09_quantization/conv2d_fwd_perlayer_quantization.cpp (100%) rename {composable_kernel/client_example => client_example}/09_quantization/gemm_quantization.cpp (100%) rename {composable_kernel/client_example => client_example}/10_grouped_convnd_bwd_data/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/10_grouped_convnd_bwd_data/grouped_conv2d_bwd_data.cpp (100%) rename {composable_kernel/client_example => client_example}/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data.cpp (100%) rename {composable_kernel/client_example => client_example}/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp (100%) rename {composable_kernel/client_example => client_example}/11_grouped_conv_bwd_weight/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/11_grouped_conv_bwd_weight/common.hpp (100%) rename {composable_kernel/client_example => client_example}/11_grouped_conv_bwd_weight/grouped_conv1d_bwd_weight_fp16.cpp (100%) rename {composable_kernel/client_example => client_example}/11_grouped_conv_bwd_weight/grouped_conv2d_bwd_weight_fp16.cpp (100%) rename {composable_kernel/client_example => client_example}/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16.cpp (100%) rename {composable_kernel/client_example => client_example}/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp32.cpp (100%) rename {composable_kernel/client_example => client_example}/12_elementwise_normalization/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/12_elementwise_normalization/elementwise_layernorm2d.cpp (100%) rename {composable_kernel/client_example => client_example}/13_batchnorm/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/13_batchnorm/batchnorm_bwd_nhwc.cpp (100%) rename {composable_kernel/client_example => client_example}/13_batchnorm/batchnorm_fwd_nhwc.cpp (100%) rename {composable_kernel/client_example => client_example}/13_batchnorm/batchnorm_infer_nhwc.cpp (100%) rename {composable_kernel/client_example => client_example}/14_instance_id/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/14_instance_id/batchnorm_fwd_instance_id.cpp (100%) rename {composable_kernel/client_example => client_example}/15_convnd_bwd_data/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/15_convnd_bwd_data/common.hpp (100%) rename {composable_kernel/client_example => client_example}/15_convnd_bwd_data/conv3d_bwd_data_fp16.cpp (100%) rename {composable_kernel/client_example => client_example}/15_convnd_bwd_data/conv3d_bwd_data_fp32.cpp (100%) rename {composable_kernel/client_example => client_example}/15_gemm_add_multiply/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/15_gemm_add_multiply/gemm_add_multiply.cpp (100%) rename {composable_kernel/client_example => client_example}/15_reduce/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/15_reduce/reduce_nhwc_c.cpp (100%) rename {composable_kernel/client_example => client_example}/16_convnd_fwd/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/16_convnd_fwd/common.hpp (100%) rename {composable_kernel/client_example => client_example}/16_convnd_fwd/conv3d_fwd_fp16.cpp (100%) rename {composable_kernel/client_example => client_example}/16_convnd_fwd/conv3d_fwd_fp16_comp_fp8.cpp (100%) rename {composable_kernel/client_example => client_example}/16_convnd_fwd/conv3d_fwd_fp32.cpp (100%) rename {composable_kernel/client_example => client_example}/17_grouped_gemm_fastgelu/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/17_grouped_gemm_fastgelu/grouped_gemm_fastgelu.cpp (100%) rename {composable_kernel/client_example => client_example}/18_groupnorm/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/18_groupnorm/groupnorm_swish.cpp (100%) rename {composable_kernel/client_example => client_example}/19_pool/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/19_pool/avg_pool3d_bwd.cpp (100%) rename {composable_kernel/client_example => client_example}/19_pool/avg_pool3d_fwd.cpp (100%) rename {composable_kernel/client_example => client_example}/19_pool/max_pool2d_bwd.cpp (100%) rename {composable_kernel/client_example => client_example}/19_pool/max_pool2d_fwd.cpp (100%) rename {composable_kernel/client_example => client_example}/20_splitk_gemm/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/20_splitk_gemm/splitK_gemm_fp16_f8.cpp (100%) rename {composable_kernel/client_example => client_example}/21_grouped_gemm_bias/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/21_grouped_gemm_bias/grouped_gemm_fixed_nk_bias_fp16.cpp (100%) rename {composable_kernel/client_example => client_example}/22_grouped_gemm/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/22_grouped_gemm/grouped_gemm_fixed_nk_fp16.cpp (100%) rename {composable_kernel/client_example => client_example}/22_grouped_gemm/grouped_gemm_fixed_nk_fp8.cpp (100%) rename {composable_kernel/client_example => client_example}/22_grouped_gemm/grouped_gemm_fixed_nk_i8.cpp (100%) rename {composable_kernel/client_example => client_example}/22_im2col_col2im/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/22_im2col_col2im/column_to_image.cpp (100%) rename {composable_kernel/client_example => client_example}/22_im2col_col2im/image_to_column.cpp (100%) rename {composable_kernel/client_example => client_example}/CMakeLists.txt (100%) rename {composable_kernel/client_example => client_example}/README.md (100%) rename {composable_kernel/cmake => cmake}/Analyzers.cmake (100%) rename {composable_kernel/cmake => cmake}/ClangTidy.cmake (100%) rename {composable_kernel/cmake => cmake}/CppCheck.cmake (100%) rename {composable_kernel/cmake => cmake}/DoxygenDoc.cmake (100%) rename {composable_kernel/cmake => cmake}/EnableCompilerWarnings.cmake (100%) rename {composable_kernel/cmake => cmake}/TargetFlags.cmake (100%) rename {composable_kernel/cmake => cmake}/googletest.cmake (100%) rename composable_kernel/dev-requirements.txt => dev-requirements.txt (100%) rename {composable_kernel/docs => docs}/API_Reference_Guide.rst (100%) rename {composable_kernel/docs => docs}/Contributors_Guide.rst (100%) rename {composable_kernel/docs => docs}/Supported_Primitives_Guide.rst (100%) rename {composable_kernel/docs => docs}/conf.py (100%) rename {composable_kernel/docs => docs}/data/ck_component.png (100%) rename {composable_kernel/docs => docs}/data/ck_layer.png (100%) rename {composable_kernel/docs => docs}/dockerhub.rst (100%) rename {composable_kernel/docs => docs}/doxygen/Doxyfile (100%) rename {composable_kernel/docs => docs}/index.rst (100%) rename {composable_kernel/docs => docs}/license.rst (100%) rename {composable_kernel/docs => docs}/refs.bib (100%) rename {composable_kernel/docs => docs}/sphinx/_toc.yml.in (100%) rename {composable_kernel/docs => docs}/sphinx/requirements.in (100%) rename {composable_kernel/docs => docs}/sphinx/requirements.txt (100%) rename {composable_kernel/docs => docs}/tutorial_hello_world.rst (100%) rename {composable_kernel/example => example}/01_gemm/CMakeLists.txt (100%) rename {composable_kernel/example => example}/01_gemm/README.md (100%) rename {composable_kernel/example => example}/01_gemm/common.hpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_dl_fp16.cpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_dl_fp32.cpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_dl_int4.cpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_dl_int8.cpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_dpp_fp16.cpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_wmma_fp16.cpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_xdl_bf16.cpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_xdl_bf16_rtn.cpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_xdl_fp16_fp8.cpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_xdl_fp64.cpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_xdl_fp8.cpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_xdl_fp8_bf8.cpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_xdl_int4.cpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_xdl_int8.cpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_xdl_streamk.cpp (100%) rename {composable_kernel/example => example}/01_gemm/gemm_xdl_wavelet_fp16.cpp (100%) rename {composable_kernel/example => example}/01_gemm/run_gemm_example.inc (100%) rename {composable_kernel/example => example}/02_gemm_bilinear/CMakeLists.txt (100%) rename {composable_kernel/example => example}/02_gemm_bilinear/README.md (100%) rename {composable_kernel/example => example}/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp (100%) rename {composable_kernel/example => example}/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp (100%) rename {composable_kernel/example => example}/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/03_gemm_bias_relu/CMakeLists.txt (100%) rename {composable_kernel/example => example}/03_gemm_bias_relu/README.md (100%) rename {composable_kernel/example => example}/03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/04_gemm_add_add_fastgelu/CMakeLists.txt (100%) rename {composable_kernel/example => example}/04_gemm_add_add_fastgelu/README.md (100%) rename {composable_kernel/example => example}/04_gemm_add_add_fastgelu/common.hpp (100%) rename {composable_kernel/example => example}/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_bf16.cpp (100%) rename {composable_kernel/example => example}/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp32.cpp (100%) rename {composable_kernel/example => example}/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int4.cpp (100%) rename {composable_kernel/example => example}/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int8.cpp (100%) rename {composable_kernel/example => example}/04_gemm_add_add_fastgelu/run_gemm_add_add_fastgelu_example.inc (100%) rename {composable_kernel/example => example}/09_convnd_fwd/CMakeLists.txt (100%) rename {composable_kernel/example => example}/09_convnd_fwd/README.md (100%) rename {composable_kernel/example => example}/09_convnd_fwd/convnd_fwd_common.hpp (100%) rename {composable_kernel/example => example}/09_convnd_fwd/convnd_fwd_dl_common.hpp (100%) rename {composable_kernel/example => example}/09_convnd_fwd/convnd_fwd_dl_fp16.cpp (100%) rename {composable_kernel/example => example}/09_convnd_fwd/convnd_fwd_dl_fp32.cpp (100%) rename {composable_kernel/example => example}/09_convnd_fwd/convnd_fwd_dl_int8.cpp (100%) rename {composable_kernel/example => example}/09_convnd_fwd/convnd_fwd_xdl_bf16.cpp (100%) rename {composable_kernel/example => example}/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/09_convnd_fwd/convnd_fwd_xdl_fp32.cpp (100%) rename {composable_kernel/example => example}/09_convnd_fwd/convnd_fwd_xdl_fp64.cpp (100%) rename {composable_kernel/example => example}/09_convnd_fwd/convnd_fwd_xdl_int8.cpp (100%) rename {composable_kernel/example => example}/09_convnd_fwd/run_convnd_fwd_dl_example.inc (100%) rename {composable_kernel/example => example}/09_convnd_fwd/run_convnd_fwd_example.inc (100%) rename {composable_kernel/example => example}/10_convnd_fwd_multiple_d_multiple_reduce/CMakeLists.txt (100%) rename {composable_kernel/example => example}/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp (100%) rename {composable_kernel/example => example}/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_bf16.cpp (100%) rename {composable_kernel/example => example}/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp32.cpp (100%) rename {composable_kernel/example => example}/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int4.cpp (100%) rename {composable_kernel/example => example}/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int8.cpp (100%) rename {composable_kernel/example => example}/10_convnd_fwd_multiple_d_multiple_reduce/run_convnd_fwd_max_example.inc (100%) rename {composable_kernel/example => example}/12_reduce/CMakeLists.txt (100%) rename {composable_kernel/example => example}/12_reduce/README.md (100%) rename {composable_kernel/example => example}/12_reduce/reduce_blockwise.cpp (100%) rename {composable_kernel/example => example}/12_reduce/reduce_blockwise_impl.hpp (100%) rename {composable_kernel/example => example}/12_reduce/reduce_blockwise_two_call.cpp (100%) rename {composable_kernel/example => example}/12_reduce/reduce_example_common.hpp (100%) rename {composable_kernel/example => example}/12_reduce/reduce_multiblock_atomic_add.cpp (100%) rename {composable_kernel/example => example}/12_reduce/reduce_multiblock_atomic_add_impl.hpp (100%) rename {composable_kernel/example => example}/13_pool2d_fwd/CMakeLists.txt (100%) rename {composable_kernel/example => example}/13_pool2d_fwd/README.md (100%) rename {composable_kernel/example => example}/13_pool2d_fwd/pool2d_fwd_common.hpp (100%) rename {composable_kernel/example => example}/13_pool2d_fwd/pool2d_fwd_fp16.cpp (100%) rename {composable_kernel/example => example}/13_pool2d_fwd/pool2d_fwd_fp32.cpp (100%) rename {composable_kernel/example => example}/14_gemm_quantization/CMakeLists.txt (100%) rename {composable_kernel/example => example}/14_gemm_quantization/gemm_dl_quantization_int8.cpp (100%) rename {composable_kernel/example => example}/14_gemm_quantization/gemm_xdl_bias_relu_quantization_int8.cpp (100%) rename {composable_kernel/example => example}/14_gemm_quantization/gemm_xdl_quantization_int8.cpp (100%) rename {composable_kernel/example => example}/15_grouped_gemm/CMakeLists.txt (100%) rename {composable_kernel/example => example}/15_grouped_gemm/README.md (100%) rename {composable_kernel/example => example}/15_grouped_gemm/grouped_gemm_multiple_d_dl_fp16.cpp (100%) rename {composable_kernel/example => example}/15_grouped_gemm/grouped_gemm_xdl_bf16.cpp (100%) rename {composable_kernel/example => example}/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp (100%) rename {composable_kernel/example => example}/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp (100%) rename {composable_kernel/example => example}/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp8.cpp (100%) rename {composable_kernel/example => example}/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/15_grouped_gemm/grouped_gemm_xdl_fp32.cpp (100%) rename {composable_kernel/example => example}/15_grouped_gemm/grouped_gemm_xdl_int4.cpp (100%) rename {composable_kernel/example => example}/15_grouped_gemm/grouped_gemm_xdl_int8.cpp (100%) rename {composable_kernel/example => example}/15_grouped_gemm/grouped_gemm_xdl_splitk_fp16.cpp (100%) rename {composable_kernel/example => example}/15_grouped_gemm/run_grouped_gemm_example.inc (100%) rename {composable_kernel/example => example}/16_gemm_multi_d_multi_reduces/CMakeLists.txt (100%) rename {composable_kernel/example => example}/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp (100%) rename {composable_kernel/example => example}/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp (100%) rename {composable_kernel/example => example}/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp (100%) rename {composable_kernel/example => example}/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp (100%) rename {composable_kernel/example => example}/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp (100%) rename {composable_kernel/example => example}/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp (100%) rename {composable_kernel/example => example}/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp (100%) rename {composable_kernel/example => example}/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp (100%) rename {composable_kernel/example => example}/17_convnd_bwd_data/CMakeLists.txt (100%) rename {composable_kernel/example => example}/17_convnd_bwd_data/README.md (100%) rename {composable_kernel/example => example}/17_convnd_bwd_data/convnd_bwd_data_common.hpp (100%) rename {composable_kernel/example => example}/17_convnd_bwd_data/convnd_bwd_data_dl_fp16.cpp (100%) rename {composable_kernel/example => example}/17_convnd_bwd_data/convnd_bwd_data_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/18_batched_gemm_reduce/CMakeLists.txt (100%) rename {composable_kernel/example => example}/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/19_binary_elementwise/CMakeLists.txt (100%) rename {composable_kernel/example => example}/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp (100%) rename {composable_kernel/example => example}/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp (100%) rename {composable_kernel/example => example}/19_binary_elementwise/elementwise_add_1d.cpp (100%) rename {composable_kernel/example => example}/19_binary_elementwise/elementwise_add_4d.cpp (100%) rename {composable_kernel/example => example}/20_grouped_conv_bwd_weight/CMakeLists.txt (100%) rename {composable_kernel/example => example}/20_grouped_conv_bwd_weight/common.hpp (100%) rename {composable_kernel/example => example}/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_dl_fp16.cpp (100%) rename {composable_kernel/example => example}/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_wmma_fp16.cpp (100%) rename {composable_kernel/example => example}/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_bf16.cpp (100%) rename {composable_kernel/example => example}/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp (100%) rename {composable_kernel/example => example}/20_grouped_conv_bwd_weight/run_grouped_conv_bwd_weight_example.inc (100%) rename {composable_kernel/example => example}/21_gemm_layernorm/CMakeLists.txt (100%) rename {composable_kernel/example => example}/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp (100%) rename {composable_kernel/example => example}/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp (100%) rename {composable_kernel/example => example}/21_gemm_layernorm/gemm_layernorm_xdl_naive_fp16.cpp (100%) rename {composable_kernel/example => example}/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp (100%) rename {composable_kernel/example => example}/22_cgemm/CMakeLists.txt (100%) rename {composable_kernel/example => example}/22_cgemm/cgemm_xdl_bf16.cpp (100%) rename {composable_kernel/example => example}/22_cgemm/cgemm_xdl_common.hpp (100%) rename {composable_kernel/example => example}/22_cgemm/cgemm_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/22_cgemm/cgemm_xdl_fp32.cpp (100%) rename {composable_kernel/example => example}/22_cgemm/cgemm_xdl_int4.cpp (100%) rename {composable_kernel/example => example}/22_cgemm/cgemm_xdl_int8.cpp (100%) rename {composable_kernel/example => example}/23_softmax/CMakeLists.txt (100%) rename {composable_kernel/example => example}/23_softmax/README.md (100%) rename {composable_kernel/example => example}/23_softmax/softmax_blockwise.cpp (100%) rename {composable_kernel/example => example}/24_batched_gemm/CMakeLists.txt (100%) rename {composable_kernel/example => example}/24_batched_gemm/batched_gemm_xdl_bf16.cpp (100%) rename {composable_kernel/example => example}/24_batched_gemm/batched_gemm_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/24_batched_gemm/batched_gemm_xdl_fp32.cpp (100%) rename {composable_kernel/example => example}/24_batched_gemm/batched_gemm_xdl_int4.cpp (100%) rename {composable_kernel/example => example}/24_batched_gemm/batched_gemm_xdl_int8.cpp (100%) rename {composable_kernel/example => example}/24_batched_gemm/run_batched_gemm_example.inc (100%) rename {composable_kernel/example => example}/25_gemm_bias_e_permute/CMakeLists.txt (100%) rename {composable_kernel/example => example}/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/26_contraction/CMakeLists.txt (100%) rename {composable_kernel/example => example}/26_contraction/README.md (100%) rename {composable_kernel/example => example}/26_contraction/contraction_bilinear_xdl_fp32.cpp (100%) rename {composable_kernel/example => example}/26_contraction/contraction_bilinear_xdl_fp64.cpp (100%) rename {composable_kernel/example => example}/26_contraction/contraction_scale_xdl_fp32.cpp (100%) rename {composable_kernel/example => example}/26_contraction/contraction_scale_xdl_fp64.cpp (100%) rename {composable_kernel/example => example}/27_layernorm/CMakeLists.txt (100%) rename {composable_kernel/example => example}/27_layernorm/common.hpp (100%) rename {composable_kernel/example => example}/27_layernorm/layernorm_fp16.cpp (100%) rename {composable_kernel/example => example}/27_layernorm/layernorm_splitk_fp16.cpp (100%) rename {composable_kernel/example => example}/27_layernorm/run_layernorm_example.inc (100%) rename {composable_kernel/example => example}/28_grouped_gemm_bias_e_permute/CMakeLists.txt (100%) rename {composable_kernel/example => example}/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/29_batched_gemm_bias_e_permute/CMakeLists.txt (100%) rename {composable_kernel/example => example}/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp (100%) rename {composable_kernel/example => example}/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/30_grouped_conv_fwd_multiple_d/CMakeLists.txt (100%) rename {composable_kernel/example => example}/30_grouped_conv_fwd_multiple_d/README.md (100%) rename {composable_kernel/example => example}/30_grouped_conv_fwd_multiple_d/common.hpp (100%) rename {composable_kernel/example => example}/30_grouped_conv_fwd_multiple_d/common_wmma.hpp (100%) rename {composable_kernel/example => example}/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_fp16.cpp (100%) rename {composable_kernel/example => example}/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_int8.cpp (100%) rename {composable_kernel/example => example}/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp (100%) rename {composable_kernel/example => example}/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp (100%) rename {composable_kernel/example => example}/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int4.cpp (100%) rename {composable_kernel/example => example}/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp (100%) rename {composable_kernel/example => example}/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_example.inc (100%) rename {composable_kernel/example => example}/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_wmma_example.inc (100%) rename {composable_kernel/example => example}/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc (100%) rename {composable_kernel/example => example}/31_batched_gemm_gemm/CMakeLists.txt (100%) rename {composable_kernel/example => example}/31_batched_gemm_gemm/batched_gemm_gemm_xdl_bf16.cpp (100%) rename {composable_kernel/example => example}/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp32.cpp (100%) rename {composable_kernel/example => example}/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp (100%) rename {composable_kernel/example => example}/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp (100%) rename {composable_kernel/example => example}/31_batched_gemm_gemm/run_batched_gemm_gemm_example.inc (100%) rename {composable_kernel/example => example}/32_batched_gemm_scale_softmax_gemm/CMakeLists.txt (100%) rename {composable_kernel/example => example}/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp (100%) rename {composable_kernel/example => example}/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp (100%) rename {composable_kernel/example => example}/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/32_batched_gemm_scale_softmax_gemm/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/32_batched_gemm_scale_softmax_gemm/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm.inc (100%) rename {composable_kernel/example => example}/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc (100%) rename {composable_kernel/example => example}/32_batched_gemm_scale_softmax_gemm/run_grouped_gemm_scale_softmax_gemm_permute.inc (100%) rename {composable_kernel/example => example}/33_multiple_reduce/CMakeLists.txt (100%) rename {composable_kernel/example => example}/33_multiple_reduce/README.md (100%) rename {composable_kernel/example => example}/33_multiple_reduce/dual_reduce_common.hpp (100%) rename {composable_kernel/example => example}/33_multiple_reduce/dual_reduce_multiblock.cpp (100%) rename {composable_kernel/example => example}/33_multiple_reduce/dual_reduce_threadwise.cpp (100%) rename {composable_kernel/example => example}/34_batchnorm/CMakeLists.txt (100%) rename {composable_kernel/example => example}/34_batchnorm/README.md (100%) rename {composable_kernel/example => example}/34_batchnorm/batchnorm_backward_nhwc.cpp (100%) rename {composable_kernel/example => example}/34_batchnorm/batchnorm_common.hpp (100%) rename {composable_kernel/example => example}/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp (100%) rename {composable_kernel/example => example}/34_batchnorm/batchnorm_forward_training_nhwc.cpp (100%) rename {composable_kernel/example => example}/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp (100%) rename {composable_kernel/example => example}/34_batchnorm/batchnorm_infer_impl.hpp (100%) rename {composable_kernel/example => example}/35_splitK_gemm/CMakeLists.txt (100%) rename {composable_kernel/example => example}/35_splitK_gemm/run_splitK_gemm_example.inc (100%) rename {composable_kernel/example => example}/35_splitK_gemm/splitK_gemm_xdl_bf16.cpp (100%) rename {composable_kernel/example => example}/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp (100%) rename {composable_kernel/example => example}/35_splitK_gemm/splitK_gemm_xdl_int4.cpp (100%) rename {composable_kernel/example => example}/35_splitK_gemm/splitK_gemm_xdl_int8.cpp (100%) rename {composable_kernel/example => example}/36_sparse_embedding/CMakeLists.txt (100%) rename {composable_kernel/example => example}/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp (100%) rename {composable_kernel/example => example}/37_batched_gemm_add_add_relu_gemm_add/CMakeLists.txt (100%) rename {composable_kernel/example => example}/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/38_grouped_conv_bwd_data_multiple_d/CMakeLists.txt (100%) rename {composable_kernel/example => example}/38_grouped_conv_bwd_data_multiple_d/common.hpp (100%) rename {composable_kernel/example => example}/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_wmma_fp16.cpp (100%) rename {composable_kernel/example => example}/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_bias_relu_example.inc (100%) rename {composable_kernel/example => example}/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc (100%) rename {composable_kernel/example => example}/39_permute/CMakeLists.txt (100%) rename {composable_kernel/example => example}/39_permute/common.hpp (100%) rename {composable_kernel/example => example}/39_permute/permute_1xHxW_fp16.cpp (100%) rename {composable_kernel/example => example}/39_permute/permute_HxWx4_fp16.cpp (100%) rename {composable_kernel/example => example}/39_permute/permute_NxHxW_fp16.cpp (100%) rename {composable_kernel/example => example}/39_permute/run_permute_bundle_example.inc (100%) rename {composable_kernel/example => example}/39_permute/run_permute_element_example.inc (100%) rename {composable_kernel/example => example}/40_conv2d_fwd_quantization/CMakeLists.txt (100%) rename {composable_kernel/example => example}/40_conv2d_fwd_quantization/common.hpp (100%) rename {composable_kernel/example => example}/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp (100%) rename {composable_kernel/example => example}/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp (100%) rename {composable_kernel/example => example}/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp (100%) rename {composable_kernel/example => example}/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp (100%) rename {composable_kernel/example => example}/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp (100%) rename {composable_kernel/example => example}/40_conv2d_fwd_quantization/conv2d_fwd_dl_perlayer_quantization_int8.cpp (100%) rename {composable_kernel/example => example}/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp (100%) rename {composable_kernel/example => example}/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp (100%) rename {composable_kernel/example => example}/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp (100%) rename {composable_kernel/example => example}/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perlayer_quantization_int8.cpp (100%) rename {composable_kernel/example => example}/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perchannel_quantization_example.inc (100%) rename {composable_kernel/example => example}/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perlayer_quantization_example.inc (100%) rename {composable_kernel/example => example}/40_conv2d_fwd_quantization/run_conv2d_fwd_perchannel_quantization_example.inc (100%) rename {composable_kernel/example => example}/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc (100%) rename {composable_kernel/example => example}/41_grouped_conv_conv_fwd/CMakeLists.txt (100%) rename {composable_kernel/example => example}/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp (100%) rename {composable_kernel/example => example}/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp (100%) rename {composable_kernel/example => example}/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp (100%) rename {composable_kernel/example => example}/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp (100%) rename {composable_kernel/example => example}/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc (100%) rename {composable_kernel/example => example}/42_groupnorm/CMakeLists.txt (100%) rename {composable_kernel/example => example}/42_groupnorm/common.hpp (100%) rename {composable_kernel/example => example}/42_groupnorm/groupnorm_sigmoid_mul_fp16.cpp (100%) rename {composable_kernel/example => example}/42_groupnorm/groupnorm_splitk_fp16.cpp (100%) rename {composable_kernel/example => example}/42_groupnorm/groupnorm_swish_fp16.cpp (100%) rename {composable_kernel/example => example}/42_groupnorm/run_groupnorm_example.inc (100%) rename {composable_kernel/example => example}/43_splitk_gemm_bias_e_permute/CMakeLists.txt (100%) rename {composable_kernel/example => example}/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp (100%) rename {composable_kernel/example => example}/44_elementwise_permute/CMakeLists.txt (100%) rename {composable_kernel/example => example}/44_elementwise_permute/elementwise_permute_4D_fp16.cpp (100%) rename {composable_kernel/example => example}/44_elementwise_permute/elementwise_permute_4D_fp16_2d.cpp (100%) rename {composable_kernel/example => example}/45_elementwise_normalization/CMakeLists.txt (100%) rename {composable_kernel/example => example}/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp (100%) rename {composable_kernel/example => example}/46_gemm_add_multiply/CMakeLists.txt (100%) rename {composable_kernel/example => example}/46_gemm_add_multiply/README.md (100%) rename {composable_kernel/example => example}/46_gemm_add_multiply/common.hpp (100%) rename {composable_kernel/example => example}/46_gemm_add_multiply/gemm_add_multiply_dl_fp16.cpp (100%) rename {composable_kernel/example => example}/46_gemm_add_multiply/gemm_add_multiply_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/46_gemm_add_multiply/run_gemm_add_multiply_example.inc (100%) rename {composable_kernel/example => example}/47_gemm_bias_softmax_gemm_permute/CMakeLists.txt (100%) rename {composable_kernel/example => example}/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute.cpp (100%) rename {composable_kernel/example => example}/48_pool3d_fwd/CMakeLists.txt (100%) rename {composable_kernel/example => example}/48_pool3d_fwd/pool3d_fwd_common.hpp (100%) rename {composable_kernel/example => example}/48_pool3d_fwd/pool3d_fwd_fp16.cpp (100%) rename {composable_kernel/example => example}/49_maxpool2d_bwd/CMakeLists.txt (100%) rename {composable_kernel/example => example}/49_maxpool2d_bwd/maxpool2d_bwd_bf16.cpp (100%) rename {composable_kernel/example => example}/49_maxpool2d_bwd/maxpool2d_bwd_common.hpp (100%) rename {composable_kernel/example => example}/49_maxpool2d_bwd/maxpool2d_bwd_fp16.cpp (100%) rename {composable_kernel/example => example}/49_maxpool2d_bwd/maxpool2d_bwd_fp32.cpp (100%) rename {composable_kernel/example => example}/50_put_element/CMakeLists.txt (100%) rename {composable_kernel/example => example}/50_put_element/put_element_fp16.cpp (100%) rename {composable_kernel/example => example}/51_avgpool3d_bwd/CMakeLists.txt (100%) rename {composable_kernel/example => example}/51_avgpool3d_bwd/avgpool3d_bwd_bf16.cpp (100%) rename {composable_kernel/example => example}/51_avgpool3d_bwd/avgpool3d_bwd_common.hpp (100%) rename {composable_kernel/example => example}/51_avgpool3d_bwd/avgpool3d_bwd_fp16.cpp (100%) rename {composable_kernel/example => example}/51_avgpool3d_bwd/avgpool3d_bwd_fp32.cpp (100%) rename {composable_kernel/example => example}/52_im2col_col2im/CMakeLists.txt (100%) rename {composable_kernel/example => example}/52_im2col_col2im/column_to_image_f32.cpp (100%) rename {composable_kernel/example => example}/52_im2col_col2im/common.hpp (100%) rename {composable_kernel/example => example}/52_im2col_col2im/image_to_column_f32.cpp (100%) rename {composable_kernel/example => example}/53_gemv_splitk/CMakeLists.txt (100%) rename {composable_kernel/example => example}/53_gemv_splitk/README.md (100%) rename {composable_kernel/example => example}/53_gemv_splitk/common.hpp (100%) rename {composable_kernel/example => example}/53_gemv_splitk/gemv_splitk_fp16.cpp (100%) rename {composable_kernel/example => example}/53_gemv_splitk/run_gemv_splitk_example.inc (100%) rename {composable_kernel/example => example}/54_tall_and_skinny_gemm_splitk/CMakeLists.txt (100%) rename {composable_kernel/example => example}/54_tall_and_skinny_gemm_splitk/README.md (100%) rename {composable_kernel/example => example}/54_tall_and_skinny_gemm_splitk/common.hpp (100%) rename {composable_kernel/example => example}/54_tall_and_skinny_gemm_splitk/run_tall_and_skinny_gemm_splitk_example.inc (100%) rename {composable_kernel/example => example}/54_tall_and_skinny_gemm_splitk/tall_and_skinny_gemm_splitk_fp16.cpp (100%) rename {composable_kernel/example => example}/60_gemm_multi_ABD/CMakeLists.txt (100%) rename {composable_kernel/example => example}/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/61_contraction_multi_ABD/CMakeLists.txt (100%) rename {composable_kernel/example => example}/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp16.cpp (100%) rename {composable_kernel/example => example}/62_conv_fwd_activ/CMakeLists.txt (100%) rename {composable_kernel/example => example}/62_conv_fwd_activ/convnd_fwd_activ_common.hpp (100%) rename {composable_kernel/example => example}/62_conv_fwd_activ/convnd_fwd_xdl_abs_fp16.cpp (100%) rename {composable_kernel/example => example}/62_conv_fwd_activ/convnd_fwd_xdl_clippedrelu_fp16.cpp (100%) rename {composable_kernel/example => example}/62_conv_fwd_activ/convnd_fwd_xdl_elu_fp16.cpp (100%) rename {composable_kernel/example => example}/62_conv_fwd_activ/convnd_fwd_xdl_leakyrelu_fp16.cpp (100%) rename {composable_kernel/example => example}/62_conv_fwd_activ/convnd_fwd_xdl_pow_fp16.cpp (100%) rename {composable_kernel/example => example}/62_conv_fwd_activ/convnd_fwd_xdl_relu_fp16.cpp (100%) rename {composable_kernel/example => example}/62_conv_fwd_activ/convnd_fwd_xdl_sigmoid_fp16.cpp (100%) rename {composable_kernel/example => example}/62_conv_fwd_activ/convnd_fwd_xdl_softrelu_fp16.cpp (100%) rename {composable_kernel/example => example}/62_conv_fwd_activ/convnd_fwd_xdl_tanh_fp16.cpp (100%) rename {composable_kernel/example => example}/62_conv_fwd_activ/run_convnd_fwd_activ_example.inc (100%) rename {composable_kernel/example => example}/CMakeLists.txt (100%) rename {composable_kernel/include => include}/ck/ck.hpp (100%) rename {composable_kernel/include => include}/ck/config.h.in (100%) rename {composable_kernel/include => include}/ck/host_utility/device_prop.hpp (100%) rename {composable_kernel/include => include}/ck/host_utility/hip_check_error.hpp (100%) rename {composable_kernel/include => include}/ck/host_utility/io.hpp (100%) rename {composable_kernel/include => include}/ck/host_utility/kernel_launch.hpp (100%) rename {composable_kernel/include => include}/ck/host_utility/stream_utility.hpp (100%) rename {composable_kernel/include => include}/ck/problem_transform/transform_forward_convolution3d_into_gemm_v4r4r4_ndhwc_kzyxc_ndhwk.hpp (100%) rename {composable_kernel/include => include}/ck/stream_config.hpp (100%) rename {composable_kernel/include => include}/ck/tensor/static_tensor.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_description/cluster_descriptor.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_description/multi_index_transform.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_description/multi_index_transform_helper.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_description/tensor_adaptor.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_description/tensor_descriptor.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_description/tensor_descriptor_helper.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_description/tensor_space_filling_curve.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/blockwise_gemm_dl_v2r3.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v2r2.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v3.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/blockwise_gemm_wmma.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/blockwise_softmax.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/blockwise_tensor_slice_transfer_v5r1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/blockwise_welford.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/reduction_functions_blockwise.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1r2.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r2.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r3.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7r2.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/conv_tensor_rearrange_op.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/convolution_backward_data_specialization.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_avgpool_bwd.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_base.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_batched_contraction_multiple_d.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_batched_gemm.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_batched_gemm_e_permute.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_batched_gemm_gemm.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_batched_gemm_multi_d.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_batched_gemm_multiple_d_gemm_multiple_d.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_permute.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_batchnorm_backward.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_batchnorm_forward.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_batchnorm_infer.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_cgemm.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_contraction_multiple_abd.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_contraction_multiple_d.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_conv_bwd_data.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_conv_fwd.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation_add.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_conv_tensor_rearrange.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_elementwise.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_elementwise_normalization.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_gemm.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_gemm_bias_e_permute.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_gemm_multiple_abd.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_gemm_multiple_d.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_gemm_multiple_d_layernorm.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_gemm_multiple_d_multiple_r.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_gemm_reduce.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_gemm_splitk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_gemm_streamk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_grouped_contraction_multiple_d.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_grouped_gemm_fixed_nk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_grouped_gemm_splitk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_max_pool_bwd.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_multiple_reduce.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_normalization.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_permute.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_pool_fwd.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_put_element.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_reduce.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_softmax.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_splitk_contraction_multiple_d.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/device_tall_and_skinny_gemm.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/gemm_specialization.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_avgpool3d_bwd_ndhwc_ndhwc.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multi_d_xdl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_batched_gemm_xdl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_batchnorm_backward_impl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl_obsolete.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_cgemm_4gemm_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_abd_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_d_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_elementwise_2d_impl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_elementwise_impl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_elementwise_normalization_impl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_bias_add_reduce_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_dl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_wmma_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_xdl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_streamk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_waveletmodel_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_dl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_nhwc_kyxc_nhwk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_fixed_nk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_max_pool_bwd_impl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_multiblock.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_threadwise.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_normalization_impl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_normalization_splitk_impl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_permute_impl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_put_element_impl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_reduce_common.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_reduce_threadwise.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_sparse_embeddings_forward_layernorm.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/impl/device_tall_and_skinny_gemm_splitk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/masking_specialization.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/matrix_padder.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/tensor_layout.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/tensor_specialization.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/device/welford_helper.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/element/element_wise_operation.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/element/quantization_operation.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_batchnorm_forward.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_first_half.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_welford_second_half_layernorm2d.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_multiblock.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_threadwise.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_batchnorm_backward_blockwise_welford.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_batchnorm_forward_blockwise_welford.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_elementwise_2d.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_selector.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v3.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle_v2.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_waveletmodel.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_b_lds_v1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_streamk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r2.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r3.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_gemv_splitk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_permute.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_set_buffer_value.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_set_multiple_buffer_value.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_softmax.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_sparse_embeddings_forward_layernorm.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_tall_and_skinny_gemm_splitk.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/gridwise_tensor_rearrange.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_naive_variance.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_selector.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_1st.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_2nd.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_welford_variance.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/thread/threadwise_contraction_dl.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/thread/threadwise_gemm_dlops_v3.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_set.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v4r1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v5r1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1r2.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7r2.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/thread/threadwise_welford.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/warp/dpp_gemm.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/warp/wmma_gemm.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/operator_transform/transform_contraction_to_gemm.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp (100%) rename {composable_kernel/include => include}/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp (100%) rename {composable_kernel/include => include}/ck/utility/amd_address_space.hpp (100%) rename {composable_kernel/include => include}/ck/utility/amd_buffer_addressing.hpp (100%) rename {composable_kernel/include => include}/ck/utility/amd_gemm_dpp.hpp (100%) rename {composable_kernel/include => include}/ck/utility/amd_inline_asm.hpp (100%) rename {composable_kernel/include => include}/ck/utility/amd_wave_read_first_lane.hpp (100%) rename {composable_kernel/include => include}/ck/utility/amd_wmma.hpp (100%) rename {composable_kernel/include => include}/ck/utility/amd_xdlops.hpp (100%) rename {composable_kernel/include => include}/ck/utility/array.hpp (100%) rename {composable_kernel/include => include}/ck/utility/array_multi_index.hpp (100%) rename {composable_kernel/include => include}/ck/utility/c_style_pointer_cast.hpp (100%) rename {composable_kernel/include => include}/ck/utility/common_header.hpp (100%) rename {composable_kernel/include => include}/ck/utility/container_element_picker.hpp (100%) rename {composable_kernel/include => include}/ck/utility/container_helper.hpp (100%) rename {composable_kernel/include => include}/ck/utility/data_type.hpp (100%) rename {composable_kernel/include => include}/ck/utility/debug.hpp (100%) rename {composable_kernel/include => include}/ck/utility/dynamic_buffer.hpp (100%) rename {composable_kernel/include => include}/ck/utility/enable_if.hpp (100%) rename {composable_kernel/include => include}/ck/utility/f8_utils.hpp (100%) rename {composable_kernel/include => include}/ck/utility/functional.hpp (100%) rename {composable_kernel/include => include}/ck/utility/functional2.hpp (100%) rename {composable_kernel/include => include}/ck/utility/functional3.hpp (100%) rename {composable_kernel/include => include}/ck/utility/functional4.hpp (100%) rename {composable_kernel/include => include}/ck/utility/generic_memory_space_atomic.hpp (100%) rename {composable_kernel/include => include}/ck/utility/get_id.hpp (100%) rename {composable_kernel/include => include}/ck/utility/get_shift.hpp (100%) rename {composable_kernel/include => include}/ck/utility/ignore.hpp (100%) rename {composable_kernel/include => include}/ck/utility/inner_product.hpp (100%) rename {composable_kernel/include => include}/ck/utility/inner_product_dpp8.hpp (100%) rename {composable_kernel/include => include}/ck/utility/integral_constant.hpp (100%) rename {composable_kernel/include => include}/ck/utility/is_detected.hpp (100%) rename {composable_kernel/include => include}/ck/utility/is_known_at_compile_time.hpp (100%) rename {composable_kernel/include => include}/ck/utility/loop_scheduler.hpp (100%) rename {composable_kernel/include => include}/ck/utility/magic_division.hpp (100%) rename {composable_kernel/include => include}/ck/utility/math.hpp (100%) rename {composable_kernel/include => include}/ck/utility/math_v2.hpp (100%) rename {composable_kernel/include => include}/ck/utility/multi_index.hpp (100%) rename {composable_kernel/include => include}/ck/utility/number.hpp (100%) rename {composable_kernel/include => include}/ck/utility/random_gen.hpp (100%) rename {composable_kernel/include => include}/ck/utility/reduction_common.hpp (100%) rename {composable_kernel/include => include}/ck/utility/reduction_enums.hpp (100%) rename {composable_kernel/include => include}/ck/utility/reduction_functions_accumulate.hpp (100%) rename {composable_kernel/include => include}/ck/utility/reduction_operator.hpp (100%) rename {composable_kernel/include => include}/ck/utility/sequence.hpp (100%) rename {composable_kernel/include => include}/ck/utility/sequence_helper.hpp (100%) rename {composable_kernel/include => include}/ck/utility/span.hpp (100%) rename {composable_kernel/include => include}/ck/utility/static_buffer.hpp (100%) rename {composable_kernel/include => include}/ck/utility/statically_indexed_array.hpp (100%) rename {composable_kernel/include => include}/ck/utility/statically_indexed_array_multi_index.hpp (100%) rename {composable_kernel/include => include}/ck/utility/synchronization.hpp (100%) rename {composable_kernel/include => include}/ck/utility/thread_group.hpp (100%) rename {composable_kernel/include => include}/ck/utility/transpose_vectors.hpp (100%) rename {composable_kernel/include => include}/ck/utility/tuple.hpp (100%) rename {composable_kernel/include => include}/ck/utility/tuple_helper.hpp (100%) rename {composable_kernel/include => include}/ck/utility/type.hpp (100%) rename {composable_kernel/include => include}/ck/utility/type_convert.hpp (100%) rename {composable_kernel/include => include}/ck/utility/workgroup_barrier.hpp (100%) rename {composable_kernel/include => include}/ck/utility/workgroup_synchronization.hpp (100%) rename {composable_kernel/include => include}/ck/version.h.in (100%) rename {composable_kernel/library => library}/CMakeLists.txt (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_avgpool_bwd.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_backward.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_forward.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_infer.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_contraction.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_reduce.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_softmax.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/reference_tensor_operation/gpu/naive_conv_fwd.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/device_operation_instance_factory.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/avg_pool3d_bwd.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/batched_gemm.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_permute.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_softmax_gemm_permute.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/batched_gemm_gemm.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/batched_gemm_multi_d.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/batchnorm_backward.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/batchnorm_forward.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/batchnorm_infer.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/contraction_bilinear.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/contraction_scale.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/convolution_backward_data.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/convolution_forward.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/device_elementwise_instance.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/device_gemm_mean_squaremean_instance.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/elementwise_normalization.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/gemm.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/gemm_add_add_fastgelu.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/gemm_add_fastgelu.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/gemm_add_multiply.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/gemm_bilinear.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/gemm_fastgelu.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/gemm_multiply_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/gemm_streamk.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/gemv_splitk.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_wmma_instance.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_dl_instance.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_wmma_instance.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv2d_fwd_wmma_instance.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_dl_instance.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_instance.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_weight.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/grouped_gemm.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_bias.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fastgelu.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fixed_nk.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/image_to_column.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/max_pool_bwd.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/normalization.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/normalization_swish.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/pool3d_fwd.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/quantization/gemm_quantization.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perchannel_quantization.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perlayer_quantization.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perchannel_quantization.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perlayer_quantization.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_impl_common.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/reduce/reduce.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/utility/algorithm.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/utility/check_err.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/utility/conv_common.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/utility/convolution_parameter.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/utility/device_memory.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/utility/fill.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/utility/host_common_util.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/utility/host_gemm.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/utility/host_tensor.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/utility/host_tensor_generator.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/utility/iterator.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/utility/literals.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/utility/numeric.hpp (100%) rename {composable_kernel/library => library}/include/ck/library/utility/ranges.hpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/avg_pool3d_bwd/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/avg_pool3d_bwd/avg_pool3d_bwd_ndhwc_instance_common.hpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_gemm/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_multi_d/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batchnorm/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f64_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f64_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f64_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/column_to_image/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_1d_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_2d_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_3d_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_int8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/elementwise/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/elementwise/device_normalize_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/elementwise_normalization/device_elementwise_normalization_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/common.hpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_add_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v1_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_opt_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_interwave_pipeline_v1_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v1_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v2_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_add_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v1_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_opt_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_interwave_pipeline_v1_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v1_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v2_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_add_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v1_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_opt_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_interwave_pipeline_v1_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_add_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v1_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_opt_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_interwave_pipeline_v1_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_fastgelu/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_multiply/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_fastgelu/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_multiply_add/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_streamk/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemm_streamk/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemv_splitk/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm_bias/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/image_to_column/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_1d_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_2d_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_3d_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/max_pool_bwd/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/max_pool_bwd/max_pool_bwd_instance_common.hpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/normalization/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_f32_f32_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/normalization/normalization_instance_common.hpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/pool3d_fwd/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f16_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f32_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/pool3d_fwd/pool_fwd_instance_common.hpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/conv2d_quantization_common.hpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perchannel_quantization_int8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_int8_instance.hpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perchannel_quantization_int8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perlayer_quantization_int8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_int8_instance.hpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_instance.hpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/quantization/gemm/gemm_quantization_common.hpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_kn_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_nk_mn_instance.cpp (100%) rename {composable_kernel/library => library}/src/utility/CMakeLists.txt (100%) rename {composable_kernel/library => library}/src/utility/convolution_parameter.cpp (100%) rename {composable_kernel/library => library}/src/utility/device_memory.cpp (100%) rename {composable_kernel/library => library}/src/utility/host_tensor.cpp (100%) rename {composable_kernel/profiler => profiler}/CMakeLists.txt (100%) rename {composable_kernel/profiler => profiler}/README.md (100%) rename {composable_kernel/profiler => profiler}/include/profiler/data_type_enum.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_avg_pool3d_bwd_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_batched_gemm_gemm_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_batched_gemm_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_batched_gemm_reduce_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_batchnorm_backward_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_batchnorm_forward_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_batchnorm_infer_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_contraction_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_contraction_utils.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_conv_bwd_data_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_conv_fwd_bias_relu_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_conv_fwd_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_conv_tensor_rearrange_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_elementwise_layernorm_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_gemm_add_fastgelu_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_gemm_add_multiply_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_gemm_add_relu_add_layernorm_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_gemm_bias_add_reduce_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_gemm_bilinear_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_gemm_fastgelu_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_gemm_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_gemm_multiply_add_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_gemm_reduce_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_gemm_splitk_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_gemm_streamk_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_gemv_splitk_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_grouped_conv_bwd_data_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_grouped_conv_fwd_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_grouped_gemm_fastgelu_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_grouped_gemm_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_groupnorm_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_layernorm_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_max_pool3d_bwd_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_pool3d_fwd_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_reduce_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_softmax_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/include/profiler/profile_tall_and_skinny_gemm_splitk_impl.hpp (100%) rename {composable_kernel/profiler => profiler}/src/CMakeLists.txt (100%) rename {composable_kernel/profiler => profiler}/src/profile_avg_pool3d_bwd.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_batched_gemm.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_batched_gemm_add_relu_gemm_add.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_batched_gemm_gemm.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_batched_gemm_multi_d.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_batched_gemm_reduce.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_batchnorm_bwd.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_batchnorm_fwd.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_batchnorm_infer.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_contraction_bilinear.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_contraction_scale.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_conv_bwd_data.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_conv_fwd.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_conv_fwd_bias_relu.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_conv_fwd_bias_relu_add.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_conv_tensor_rearrange.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_gemm.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_gemm_add_add_fastgelu.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_gemm_add_fastgelu.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_gemm_add_multiply.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_gemm_add_relu_add_layernorm.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_gemm_bias_add_reduce.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_gemm_bilinear.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_gemm_fastgelu.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_gemm_multiply_add.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_gemm_reduce.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_gemm_splitk.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_gemm_streamk.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_gemv_splitk.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_grouped_conv_bwd_data.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_grouped_conv_bwd_weight.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_grouped_conv_fwd.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_grouped_gemm.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_grouped_gemm_fastgelu.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_groupnorm.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_layernorm.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_max_pool3d_bwd.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_max_pool3d_fwd.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_reduce.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_softmax.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profile_tall_and_skinny_gemm_splitk.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profiler.cpp (100%) rename {composable_kernel/profiler => profiler}/src/profiler_operation_registry.hpp (100%) rename composable_kernel/rbuild.ini => rbuild.ini (100%) rename composable_kernel/requirements.txt => requirements.txt (100%) rename {composable_kernel/script => script}/check_copyright_year.sh (100%) rename {composable_kernel/script => script}/clang-format-overwrite.sh (100%) rename {composable_kernel/script => script}/cmake-ck-dev.sh (100%) rename {composable_kernel/script => script}/cmake-ck-release.sh (100%) rename {composable_kernel/script => script}/count_vgpr.sh (100%) rename {composable_kernel/script => script}/hipclang_opt.sh (100%) rename {composable_kernel/script => script}/install_precommit.sh (100%) rename {composable_kernel/script => script}/parse_perf_data.py (100%) rename {composable_kernel/script => script}/process_perf_data.py (100%) rename {composable_kernel/script => script}/process_perf_data.sh (100%) rename {composable_kernel/script => script}/process_qa_data.sh (100%) rename {composable_kernel/script => script}/profile_batched_gemm.sh (100%) rename {composable_kernel/script => script}/profile_conv_bwd_data.sh (100%) rename {composable_kernel/script => script}/profile_conv_fwd.sh (100%) rename {composable_kernel/script => script}/profile_gemm.sh (100%) rename {composable_kernel/script => script}/profile_gemm_bilinear.sh (100%) rename {composable_kernel/script => script}/profile_grouped_gemm.sh (100%) rename {composable_kernel/script => script}/profile_onnx_gemm.sh (100%) rename {composable_kernel/script => script}/profile_reduce_no_index.sh (100%) rename {composable_kernel/script => script}/profile_reduce_with_index.sh (100%) rename {composable_kernel/script => script}/profile_resnet50.sh (100%) rename {composable_kernel/script => script}/profile_splitK_gemm.sh (100%) rename {composable_kernel/script => script}/run_full_performance_tests.sh (100%) rename {composable_kernel/script => script}/run_performance_tests.sh (100%) rename {composable_kernel/script => script}/test_convnd_fwd.sh (100%) rename {composable_kernel/script => script}/test_reduce_no_index.sh (100%) rename {composable_kernel/script => script}/test_reduce_with_index.sh (100%) rename {composable_kernel/script => script}/uninstall_precommit.sh (100%) rename {composable_kernel/test => test}/CMakeLists.txt (100%) rename {composable_kernel/test => test}/batched_gemm/CMakeLists.txt (100%) rename {composable_kernel/test => test}/batched_gemm/batched_gemm_bf16.cpp (100%) rename {composable_kernel/test => test}/batched_gemm/batched_gemm_fp16.cpp (100%) rename {composable_kernel/test => test}/batched_gemm/batched_gemm_fp32.cpp (100%) rename {composable_kernel/test => test}/batched_gemm/batched_gemm_int8.cpp (100%) rename {composable_kernel/test => test}/batched_gemm/test_batched_gemm.cpp (100%) rename {composable_kernel/test => test}/batched_gemm_gemm/CMakeLists.txt (100%) rename {composable_kernel/test => test}/batched_gemm_gemm/test_batched_gemm_gemm_fp16.cpp (100%) rename {composable_kernel/test => test}/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp (100%) rename {composable_kernel/test => test}/batched_gemm_multi_d/CMakeLists.txt (100%) rename {composable_kernel/test => test}/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp (100%) rename {composable_kernel/test => test}/batched_gemm_reduce/CMakeLists.txt (100%) rename {composable_kernel/test => test}/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp (100%) rename {composable_kernel/test => test}/batched_gemm_softmax_gemm/CMakeLists.txt (100%) rename {composable_kernel/test => test}/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_fp16.cpp (100%) rename {composable_kernel/test => test}/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp (100%) rename {composable_kernel/test => test}/batched_gemm_softmax_gemm_permute/CMakeLists.txt (100%) rename {composable_kernel/test => test}/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp (100%) rename {composable_kernel/test => test}/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp (100%) rename {composable_kernel/test => test}/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_util.hpp (100%) rename {composable_kernel/test => test}/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16.cpp (100%) rename {composable_kernel/test => test}/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16.cpp (100%) rename {composable_kernel/test => test}/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_util.hpp (100%) rename {composable_kernel/test => test}/batchnorm/CMakeLists.txt (100%) rename {composable_kernel/test => test}/batchnorm/batchnorm_bwd_rank_4.cpp (100%) rename {composable_kernel/test => test}/batchnorm/batchnorm_fwd_rank_4.cpp (100%) rename {composable_kernel/test => test}/batchnorm/batchnorm_infer_rank_4.cpp (100%) rename {composable_kernel/test => test}/block_swizzle_test/block_swizzle_test.cpp (100%) rename {composable_kernel/test => test}/block_swizzle_test/rebuild.sh (100%) rename {composable_kernel/test => test}/block_swizzle_test/simple_args.h (100%) rename {composable_kernel/test => test}/block_to_ctile_map/CMakeLists.txt (100%) rename {composable_kernel/test => test}/block_to_ctile_map/test_block_to_ctile_map.cpp (100%) rename {composable_kernel/test => test}/contraction/CMakeLists.txt (100%) rename {composable_kernel/test => test}/contraction/test_contraction.cpp (100%) rename {composable_kernel/test => test}/contraction/test_contraction_interface.cpp (100%) rename {composable_kernel/test => test}/conv_tensor_rearrange/CMakeLists.txt (100%) rename {composable_kernel/test => test}/conv_tensor_rearrange/test_conv_tensor_rearrange.cpp (100%) rename {composable_kernel/test => test}/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp (100%) rename {composable_kernel/test => test}/conv_util/CMakeLists.txt (100%) rename {composable_kernel/test => test}/conv_util/conv_util.cpp (100%) rename {composable_kernel/test => test}/convnd_bwd_data/CMakeLists.txt (100%) rename {composable_kernel/test => test}/convnd_bwd_data/convnd_bwd_data.cpp (100%) rename {composable_kernel/test => test}/convnd_fwd/CMakeLists.txt (100%) rename {composable_kernel/test => test}/convnd_fwd/convnd_fwd.cpp (100%) rename {composable_kernel/test => test}/data_type/CMakeLists.txt (100%) rename {composable_kernel/test => test}/data_type/test_bf8.cpp (100%) rename {composable_kernel/test => test}/data_type/test_fp8.cpp (100%) rename {composable_kernel/test => test}/data_type/test_int4.cpp (100%) rename {composable_kernel/test => test}/data_type/type_convert_const.cpp (100%) rename {composable_kernel/test => test}/elementwise_normalization/CMakeLists.txt (100%) rename {composable_kernel/test => test}/elementwise_normalization/test_elementwise_layernorm_fp16.cpp (100%) rename {composable_kernel/test => test}/gemm/CMakeLists.txt (100%) rename {composable_kernel/test => test}/gemm/gemm_bf16.cpp (100%) rename {composable_kernel/test => test}/gemm/gemm_fp16.cpp (100%) rename {composable_kernel/test => test}/gemm/gemm_fp32.cpp (100%) rename {composable_kernel/test => test}/gemm/gemm_fp64.cpp (100%) rename {composable_kernel/test => test}/gemm/gemm_int8.cpp (100%) rename {composable_kernel/test => test}/gemm/gemm_standalone_xdl_fp16.cpp (100%) rename {composable_kernel/test => test}/gemm/gemm_util.hpp (100%) rename {composable_kernel/test => test}/gemm/instance/gemm_f16_nn_instance.cpp (100%) rename {composable_kernel/test => test}/gemm/instance/gemm_f16_nn_instance.hpp (100%) rename {composable_kernel/test => test}/gemm/instance/gemm_f16_nt_instance.cpp (100%) rename {composable_kernel/test => test}/gemm/instance/gemm_f16_nt_instance.hpp (100%) rename {composable_kernel/test => test}/gemm/instance/gemm_f16_tn_instance.cpp (100%) rename {composable_kernel/test => test}/gemm/instance/gemm_f16_tn_instance.hpp (100%) rename {composable_kernel/test => test}/gemm/instance/gemm_f16_tt_instance.cpp (100%) rename {composable_kernel/test => test}/gemm/instance/gemm_f16_tt_instance.hpp (100%) rename {composable_kernel/test => test}/gemm/instance/gemm_wavelet_f16_tn_instance.cpp (100%) rename {composable_kernel/test => test}/gemm/instance/gemm_wavelet_f16_tn_instance.hpp (100%) rename {composable_kernel/test => test}/gemm/run_gemm_test.inc (100%) rename {composable_kernel/test => test}/gemm_layernorm/CMakeLists.txt (100%) rename {composable_kernel/test => test}/gemm_layernorm/test_gemm_add_relu_add_layernorm_fp16.cpp (100%) rename {composable_kernel/test => test}/gemm_reduce/CMakeLists.txt (100%) rename {composable_kernel/test => test}/gemm_reduce/gemm_reduce_fp16.cpp (100%) rename {composable_kernel/test => test}/gemm_split_k/CMakeLists.txt (100%) rename {composable_kernel/test => test}/gemm_split_k/test_gemm_splitk.cpp (100%) rename {composable_kernel/test => test}/gemm_split_k/test_gemm_splitk_ut_cases.inc (100%) rename {composable_kernel/test => test}/gemm_split_k/test_gemm_splitk_util.hpp (100%) rename {composable_kernel/test => test}/grouped_convnd_bwd_data/CMakeLists.txt (100%) rename {composable_kernel/test => test}/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp (100%) rename {composable_kernel/test => test}/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_wmma.cpp (100%) rename {composable_kernel/test => test}/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_xdl.cpp (100%) rename {composable_kernel/test => test}/grouped_convnd_bwd_weight/CMakeLists.txt (100%) rename {composable_kernel/test => test}/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp (100%) rename {composable_kernel/test => test}/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_wmma.cpp (100%) rename {composable_kernel/test => test}/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_xdl.cpp (100%) rename {composable_kernel/test => test}/grouped_convnd_fwd/CMakeLists.txt (100%) rename {composable_kernel/test => test}/grouped_convnd_fwd/grouped_convnd_fwd.cpp (100%) rename {composable_kernel/test => test}/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp (100%) rename {composable_kernel/test => test}/grouped_gemm/CMakeLists.txt (100%) rename {composable_kernel/test => test}/grouped_gemm/test_grouped_gemm_interface.cpp (100%) rename {composable_kernel/test => test}/grouped_gemm/test_grouped_gemm_splitk.cpp (100%) rename {composable_kernel/test => test}/grouped_gemm/test_grouped_gemm_ut_cases.inc (100%) rename {composable_kernel/test => test}/grouped_gemm/test_grouped_gemm_util.hpp (100%) rename {composable_kernel/test => test}/image_to_column/CMakeLists.txt (100%) rename {composable_kernel/test => test}/image_to_column/test_image_to_column.cpp (100%) rename {composable_kernel/test => test}/image_to_column/test_image_to_column_interface.cpp (100%) rename {composable_kernel/test => test}/magic_number_division/CMakeLists.txt (100%) rename {composable_kernel/test => test}/magic_number_division/magic_number_division.cpp (100%) rename {composable_kernel/test => test}/normalization/CMakeLists.txt (100%) rename {composable_kernel/test => test}/normalization/test_groupnorm_fp16.cpp (100%) rename {composable_kernel/test => test}/normalization/test_groupnorm_fp32.cpp (100%) rename {composable_kernel/test => test}/normalization/test_layernorm2d_fp16.cpp (100%) rename {composable_kernel/test => test}/normalization/test_layernorm2d_fp32.cpp (100%) rename {composable_kernel/test => test}/pool/CMakeLists.txt (100%) rename {composable_kernel/test => test}/pool/test_avg_pool3d_bwd.cpp (100%) rename {composable_kernel/test => test}/pool/test_avg_pool3d_fwd.cpp (100%) rename {composable_kernel/test => test}/pool/test_max_pool3d_bwd.cpp (100%) rename {composable_kernel/test => test}/pool/test_max_pool3d_fwd.cpp (100%) rename {composable_kernel/test => test}/pool/test_pool_fwd_common.hpp (100%) rename {composable_kernel/test => test}/reduce/CMakeLists.txt (100%) rename {composable_kernel/test => test}/reduce/reduce_no_index.cpp (100%) rename {composable_kernel/test => test}/reduce/reduce_with_index.cpp (100%) rename {composable_kernel/test => test}/reference_conv_fwd/CMakeLists.txt (100%) rename {composable_kernel/test => test}/reference_conv_fwd/reference_conv_fwd.cpp (100%) rename {composable_kernel/test => test}/softmax/CMakeLists.txt (100%) rename {composable_kernel/test => test}/softmax/test_softmax_interface.cpp (100%) rename {composable_kernel/test => test}/softmax/test_softmax_rank3.cpp (100%) rename {composable_kernel/test => test}/softmax/test_softmax_rank4.cpp (100%) rename {composable_kernel/test => test}/softmax/test_softmax_ut_cases.inc (100%) rename {composable_kernel/test => test}/softmax/test_softmax_util.hpp (100%) rename {composable_kernel/test => test}/space_filling_curve/CMakeLists.txt (100%) rename {composable_kernel/test => test}/space_filling_curve/space_filling_curve.cpp (100%) rename {composable_kernel/test => test}/wmma_op/CMakeLists.txt (100%) rename {composable_kernel/test => test}/wmma_op/wmma_op.cpp (100%) rename {composable_kernel/test => test}/wmma_op/wmma_op_util.hpp (100%) diff --git a/composable_kernel/CHANGELOG.md b/CHANGELOG.md similarity index 100% rename from composable_kernel/CHANGELOG.md rename to CHANGELOG.md diff --git a/composable_kernel/CITATION.cff b/CITATION.cff similarity index 100% rename from composable_kernel/CITATION.cff rename to CITATION.cff diff --git a/composable_kernel/CMakeLists.txt b/CMakeLists.txt similarity index 100% rename from composable_kernel/CMakeLists.txt rename to CMakeLists.txt diff --git a/composable_kernel/CONTRIBUTORS.md b/CONTRIBUTORS.md similarity index 100% rename from composable_kernel/CONTRIBUTORS.md rename to CONTRIBUTORS.md diff --git a/composable_kernel/Config.cmake.in b/Config.cmake.in similarity index 100% rename from composable_kernel/Config.cmake.in rename to Config.cmake.in diff --git a/composable_kernel/Dockerfile b/Dockerfile similarity index 100% rename from composable_kernel/Dockerfile rename to Dockerfile diff --git a/composable_kernel/Jenkinsfile b/Jenkinsfile similarity index 100% rename from composable_kernel/Jenkinsfile rename to Jenkinsfile diff --git a/composable_kernel/LICENSE b/LICENSE similarity index 100% rename from composable_kernel/LICENSE rename to LICENSE diff --git a/composable_kernel/README.md b/README.md similarity index 100% rename from composable_kernel/README.md rename to README.md diff --git a/composable_kernel/client_example/01_gemm/CMakeLists.txt b/client_example/01_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/01_gemm/CMakeLists.txt rename to client_example/01_gemm/CMakeLists.txt diff --git a/composable_kernel/client_example/01_gemm/gemm.cpp b/client_example/01_gemm/gemm.cpp similarity index 100% rename from composable_kernel/client_example/01_gemm/gemm.cpp rename to client_example/01_gemm/gemm.cpp diff --git a/composable_kernel/client_example/02_gemm_add_add_fastgelu/CMakeLists.txt b/client_example/02_gemm_add_add_fastgelu/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/02_gemm_add_add_fastgelu/CMakeLists.txt rename to client_example/02_gemm_add_add_fastgelu/CMakeLists.txt diff --git a/composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp similarity index 100% rename from composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp rename to client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp diff --git a/composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp similarity index 100% rename from composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp rename to client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp diff --git a/composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp similarity index 100% rename from composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp rename to client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp diff --git a/composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp similarity index 100% rename from composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp rename to client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp diff --git a/composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp similarity index 100% rename from composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp rename to client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp diff --git a/composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp similarity index 100% rename from composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp rename to client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp diff --git a/composable_kernel/client_example/03_gemm_layernorm/CMakeLists.txt b/client_example/03_gemm_layernorm/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/03_gemm_layernorm/CMakeLists.txt rename to client_example/03_gemm_layernorm/CMakeLists.txt diff --git a/composable_kernel/client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp b/client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp similarity index 100% rename from composable_kernel/client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp rename to client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp diff --git a/composable_kernel/client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp b/client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp similarity index 100% rename from composable_kernel/client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp rename to client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp diff --git a/composable_kernel/client_example/04_contraction/CMakeLists.txt b/client_example/04_contraction/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/04_contraction/CMakeLists.txt rename to client_example/04_contraction/CMakeLists.txt diff --git a/composable_kernel/client_example/04_contraction/contraction_bilinear_fp32.cpp b/client_example/04_contraction/contraction_bilinear_fp32.cpp similarity index 100% rename from composable_kernel/client_example/04_contraction/contraction_bilinear_fp32.cpp rename to client_example/04_contraction/contraction_bilinear_fp32.cpp diff --git a/composable_kernel/client_example/04_contraction/contraction_bilinear_fp64.cpp b/client_example/04_contraction/contraction_bilinear_fp64.cpp similarity index 100% rename from composable_kernel/client_example/04_contraction/contraction_bilinear_fp64.cpp rename to client_example/04_contraction/contraction_bilinear_fp64.cpp diff --git a/composable_kernel/client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp b/client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp similarity index 100% rename from composable_kernel/client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp rename to client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp diff --git a/composable_kernel/client_example/04_contraction/contraction_scale_fp32.cpp b/client_example/04_contraction/contraction_scale_fp32.cpp similarity index 100% rename from composable_kernel/client_example/04_contraction/contraction_scale_fp32.cpp rename to client_example/04_contraction/contraction_scale_fp32.cpp diff --git a/composable_kernel/client_example/04_contraction/contraction_scale_fp64.cpp b/client_example/04_contraction/contraction_scale_fp64.cpp similarity index 100% rename from composable_kernel/client_example/04_contraction/contraction_scale_fp64.cpp rename to client_example/04_contraction/contraction_scale_fp64.cpp diff --git a/composable_kernel/client_example/05_layernorm/CMakeLists.txt b/client_example/05_layernorm/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/05_layernorm/CMakeLists.txt rename to client_example/05_layernorm/CMakeLists.txt diff --git a/composable_kernel/client_example/05_layernorm/layernorm2d.cpp b/client_example/05_layernorm/layernorm2d.cpp similarity index 100% rename from composable_kernel/client_example/05_layernorm/layernorm2d.cpp rename to client_example/05_layernorm/layernorm2d.cpp diff --git a/composable_kernel/client_example/06_softmax/CMakeLists.txt b/client_example/06_softmax/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/06_softmax/CMakeLists.txt rename to client_example/06_softmax/CMakeLists.txt diff --git a/composable_kernel/client_example/06_softmax/softmax4d.cpp b/client_example/06_softmax/softmax4d.cpp similarity index 100% rename from composable_kernel/client_example/06_softmax/softmax4d.cpp rename to client_example/06_softmax/softmax4d.cpp diff --git a/composable_kernel/client_example/07_grouped_convnd_fwd/CMakeLists.txt b/client_example/07_grouped_convnd_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/07_grouped_convnd_fwd/CMakeLists.txt rename to client_example/07_grouped_convnd_fwd/CMakeLists.txt diff --git a/composable_kernel/client_example/07_grouped_convnd_fwd/grouped_conv1d_fwd.cpp b/client_example/07_grouped_convnd_fwd/grouped_conv1d_fwd.cpp similarity index 100% rename from composable_kernel/client_example/07_grouped_convnd_fwd/grouped_conv1d_fwd.cpp rename to client_example/07_grouped_convnd_fwd/grouped_conv1d_fwd.cpp diff --git a/composable_kernel/client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp b/client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp similarity index 100% rename from composable_kernel/client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp rename to client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp diff --git a/composable_kernel/client_example/08_fused_attention/CMakeLists.txt b/client_example/08_fused_attention/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/08_fused_attention/CMakeLists.txt rename to client_example/08_fused_attention/CMakeLists.txt diff --git a/composable_kernel/client_example/08_fused_attention/fused_attention.cpp b/client_example/08_fused_attention/fused_attention.cpp similarity index 100% rename from composable_kernel/client_example/08_fused_attention/fused_attention.cpp rename to client_example/08_fused_attention/fused_attention.cpp diff --git a/composable_kernel/client_example/08_fused_attention/fused_attention_bias.cpp b/client_example/08_fused_attention/fused_attention_bias.cpp similarity index 100% rename from composable_kernel/client_example/08_fused_attention/fused_attention_bias.cpp rename to client_example/08_fused_attention/fused_attention_bias.cpp diff --git a/composable_kernel/client_example/09_quantization/CMakeLists.txt b/client_example/09_quantization/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/09_quantization/CMakeLists.txt rename to client_example/09_quantization/CMakeLists.txt diff --git a/composable_kernel/client_example/09_quantization/conv2d_fwd_bias_relu_perchannel_quantization.cpp b/client_example/09_quantization/conv2d_fwd_bias_relu_perchannel_quantization.cpp similarity index 100% rename from composable_kernel/client_example/09_quantization/conv2d_fwd_bias_relu_perchannel_quantization.cpp rename to client_example/09_quantization/conv2d_fwd_bias_relu_perchannel_quantization.cpp diff --git a/composable_kernel/client_example/09_quantization/conv2d_fwd_bias_relu_perlayer_quantization.cpp b/client_example/09_quantization/conv2d_fwd_bias_relu_perlayer_quantization.cpp similarity index 100% rename from composable_kernel/client_example/09_quantization/conv2d_fwd_bias_relu_perlayer_quantization.cpp rename to client_example/09_quantization/conv2d_fwd_bias_relu_perlayer_quantization.cpp diff --git a/composable_kernel/client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp b/client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp similarity index 100% rename from composable_kernel/client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp rename to client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp diff --git a/composable_kernel/client_example/09_quantization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp b/client_example/09_quantization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp similarity index 100% rename from composable_kernel/client_example/09_quantization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp rename to client_example/09_quantization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp diff --git a/composable_kernel/client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp b/client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp similarity index 100% rename from composable_kernel/client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp rename to client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp diff --git a/composable_kernel/client_example/09_quantization/conv2d_fwd_perlayer_quantization.cpp b/client_example/09_quantization/conv2d_fwd_perlayer_quantization.cpp similarity index 100% rename from composable_kernel/client_example/09_quantization/conv2d_fwd_perlayer_quantization.cpp rename to client_example/09_quantization/conv2d_fwd_perlayer_quantization.cpp diff --git a/composable_kernel/client_example/09_quantization/gemm_quantization.cpp b/client_example/09_quantization/gemm_quantization.cpp similarity index 100% rename from composable_kernel/client_example/09_quantization/gemm_quantization.cpp rename to client_example/09_quantization/gemm_quantization.cpp diff --git a/composable_kernel/client_example/10_grouped_convnd_bwd_data/CMakeLists.txt b/client_example/10_grouped_convnd_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/10_grouped_convnd_bwd_data/CMakeLists.txt rename to client_example/10_grouped_convnd_bwd_data/CMakeLists.txt diff --git a/composable_kernel/client_example/10_grouped_convnd_bwd_data/grouped_conv2d_bwd_data.cpp b/client_example/10_grouped_convnd_bwd_data/grouped_conv2d_bwd_data.cpp similarity index 100% rename from composable_kernel/client_example/10_grouped_convnd_bwd_data/grouped_conv2d_bwd_data.cpp rename to client_example/10_grouped_convnd_bwd_data/grouped_conv2d_bwd_data.cpp diff --git a/composable_kernel/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data.cpp b/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data.cpp similarity index 100% rename from composable_kernel/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data.cpp rename to client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data.cpp diff --git a/composable_kernel/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp b/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp similarity index 100% rename from composable_kernel/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp rename to client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp diff --git a/composable_kernel/client_example/11_grouped_conv_bwd_weight/CMakeLists.txt b/client_example/11_grouped_conv_bwd_weight/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/11_grouped_conv_bwd_weight/CMakeLists.txt rename to client_example/11_grouped_conv_bwd_weight/CMakeLists.txt diff --git a/composable_kernel/client_example/11_grouped_conv_bwd_weight/common.hpp b/client_example/11_grouped_conv_bwd_weight/common.hpp similarity index 100% rename from composable_kernel/client_example/11_grouped_conv_bwd_weight/common.hpp rename to client_example/11_grouped_conv_bwd_weight/common.hpp diff --git a/composable_kernel/client_example/11_grouped_conv_bwd_weight/grouped_conv1d_bwd_weight_fp16.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv1d_bwd_weight_fp16.cpp similarity index 100% rename from composable_kernel/client_example/11_grouped_conv_bwd_weight/grouped_conv1d_bwd_weight_fp16.cpp rename to client_example/11_grouped_conv_bwd_weight/grouped_conv1d_bwd_weight_fp16.cpp diff --git a/composable_kernel/client_example/11_grouped_conv_bwd_weight/grouped_conv2d_bwd_weight_fp16.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv2d_bwd_weight_fp16.cpp similarity index 100% rename from composable_kernel/client_example/11_grouped_conv_bwd_weight/grouped_conv2d_bwd_weight_fp16.cpp rename to client_example/11_grouped_conv_bwd_weight/grouped_conv2d_bwd_weight_fp16.cpp diff --git a/composable_kernel/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16.cpp similarity index 100% rename from composable_kernel/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16.cpp rename to client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16.cpp diff --git a/composable_kernel/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp32.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp32.cpp similarity index 100% rename from composable_kernel/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp32.cpp rename to client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp32.cpp diff --git a/composable_kernel/client_example/12_elementwise_normalization/CMakeLists.txt b/client_example/12_elementwise_normalization/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/12_elementwise_normalization/CMakeLists.txt rename to client_example/12_elementwise_normalization/CMakeLists.txt diff --git a/composable_kernel/client_example/12_elementwise_normalization/elementwise_layernorm2d.cpp b/client_example/12_elementwise_normalization/elementwise_layernorm2d.cpp similarity index 100% rename from composable_kernel/client_example/12_elementwise_normalization/elementwise_layernorm2d.cpp rename to client_example/12_elementwise_normalization/elementwise_layernorm2d.cpp diff --git a/composable_kernel/client_example/13_batchnorm/CMakeLists.txt b/client_example/13_batchnorm/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/13_batchnorm/CMakeLists.txt rename to client_example/13_batchnorm/CMakeLists.txt diff --git a/composable_kernel/client_example/13_batchnorm/batchnorm_bwd_nhwc.cpp b/client_example/13_batchnorm/batchnorm_bwd_nhwc.cpp similarity index 100% rename from composable_kernel/client_example/13_batchnorm/batchnorm_bwd_nhwc.cpp rename to client_example/13_batchnorm/batchnorm_bwd_nhwc.cpp diff --git a/composable_kernel/client_example/13_batchnorm/batchnorm_fwd_nhwc.cpp b/client_example/13_batchnorm/batchnorm_fwd_nhwc.cpp similarity index 100% rename from composable_kernel/client_example/13_batchnorm/batchnorm_fwd_nhwc.cpp rename to client_example/13_batchnorm/batchnorm_fwd_nhwc.cpp diff --git a/composable_kernel/client_example/13_batchnorm/batchnorm_infer_nhwc.cpp b/client_example/13_batchnorm/batchnorm_infer_nhwc.cpp similarity index 100% rename from composable_kernel/client_example/13_batchnorm/batchnorm_infer_nhwc.cpp rename to client_example/13_batchnorm/batchnorm_infer_nhwc.cpp diff --git a/composable_kernel/client_example/14_instance_id/CMakeLists.txt b/client_example/14_instance_id/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/14_instance_id/CMakeLists.txt rename to client_example/14_instance_id/CMakeLists.txt diff --git a/composable_kernel/client_example/14_instance_id/batchnorm_fwd_instance_id.cpp b/client_example/14_instance_id/batchnorm_fwd_instance_id.cpp similarity index 100% rename from composable_kernel/client_example/14_instance_id/batchnorm_fwd_instance_id.cpp rename to client_example/14_instance_id/batchnorm_fwd_instance_id.cpp diff --git a/composable_kernel/client_example/15_convnd_bwd_data/CMakeLists.txt b/client_example/15_convnd_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/15_convnd_bwd_data/CMakeLists.txt rename to client_example/15_convnd_bwd_data/CMakeLists.txt diff --git a/composable_kernel/client_example/15_convnd_bwd_data/common.hpp b/client_example/15_convnd_bwd_data/common.hpp similarity index 100% rename from composable_kernel/client_example/15_convnd_bwd_data/common.hpp rename to client_example/15_convnd_bwd_data/common.hpp diff --git a/composable_kernel/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp16.cpp b/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp16.cpp similarity index 100% rename from composable_kernel/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp16.cpp rename to client_example/15_convnd_bwd_data/conv3d_bwd_data_fp16.cpp diff --git a/composable_kernel/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp32.cpp b/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp32.cpp similarity index 100% rename from composable_kernel/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp32.cpp rename to client_example/15_convnd_bwd_data/conv3d_bwd_data_fp32.cpp diff --git a/composable_kernel/client_example/15_gemm_add_multiply/CMakeLists.txt b/client_example/15_gemm_add_multiply/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/15_gemm_add_multiply/CMakeLists.txt rename to client_example/15_gemm_add_multiply/CMakeLists.txt diff --git a/composable_kernel/client_example/15_gemm_add_multiply/gemm_add_multiply.cpp b/client_example/15_gemm_add_multiply/gemm_add_multiply.cpp similarity index 100% rename from composable_kernel/client_example/15_gemm_add_multiply/gemm_add_multiply.cpp rename to client_example/15_gemm_add_multiply/gemm_add_multiply.cpp diff --git a/composable_kernel/client_example/15_reduce/CMakeLists.txt b/client_example/15_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/15_reduce/CMakeLists.txt rename to client_example/15_reduce/CMakeLists.txt diff --git a/composable_kernel/client_example/15_reduce/reduce_nhwc_c.cpp b/client_example/15_reduce/reduce_nhwc_c.cpp similarity index 100% rename from composable_kernel/client_example/15_reduce/reduce_nhwc_c.cpp rename to client_example/15_reduce/reduce_nhwc_c.cpp diff --git a/composable_kernel/client_example/16_convnd_fwd/CMakeLists.txt b/client_example/16_convnd_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/16_convnd_fwd/CMakeLists.txt rename to client_example/16_convnd_fwd/CMakeLists.txt diff --git a/composable_kernel/client_example/16_convnd_fwd/common.hpp b/client_example/16_convnd_fwd/common.hpp similarity index 100% rename from composable_kernel/client_example/16_convnd_fwd/common.hpp rename to client_example/16_convnd_fwd/common.hpp diff --git a/composable_kernel/client_example/16_convnd_fwd/conv3d_fwd_fp16.cpp b/client_example/16_convnd_fwd/conv3d_fwd_fp16.cpp similarity index 100% rename from composable_kernel/client_example/16_convnd_fwd/conv3d_fwd_fp16.cpp rename to client_example/16_convnd_fwd/conv3d_fwd_fp16.cpp diff --git a/composable_kernel/client_example/16_convnd_fwd/conv3d_fwd_fp16_comp_fp8.cpp b/client_example/16_convnd_fwd/conv3d_fwd_fp16_comp_fp8.cpp similarity index 100% rename from composable_kernel/client_example/16_convnd_fwd/conv3d_fwd_fp16_comp_fp8.cpp rename to client_example/16_convnd_fwd/conv3d_fwd_fp16_comp_fp8.cpp diff --git a/composable_kernel/client_example/16_convnd_fwd/conv3d_fwd_fp32.cpp b/client_example/16_convnd_fwd/conv3d_fwd_fp32.cpp similarity index 100% rename from composable_kernel/client_example/16_convnd_fwd/conv3d_fwd_fp32.cpp rename to client_example/16_convnd_fwd/conv3d_fwd_fp32.cpp diff --git a/composable_kernel/client_example/17_grouped_gemm_fastgelu/CMakeLists.txt b/client_example/17_grouped_gemm_fastgelu/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/17_grouped_gemm_fastgelu/CMakeLists.txt rename to client_example/17_grouped_gemm_fastgelu/CMakeLists.txt diff --git a/composable_kernel/client_example/17_grouped_gemm_fastgelu/grouped_gemm_fastgelu.cpp b/client_example/17_grouped_gemm_fastgelu/grouped_gemm_fastgelu.cpp similarity index 100% rename from composable_kernel/client_example/17_grouped_gemm_fastgelu/grouped_gemm_fastgelu.cpp rename to client_example/17_grouped_gemm_fastgelu/grouped_gemm_fastgelu.cpp diff --git a/composable_kernel/client_example/18_groupnorm/CMakeLists.txt b/client_example/18_groupnorm/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/18_groupnorm/CMakeLists.txt rename to client_example/18_groupnorm/CMakeLists.txt diff --git a/composable_kernel/client_example/18_groupnorm/groupnorm_swish.cpp b/client_example/18_groupnorm/groupnorm_swish.cpp similarity index 100% rename from composable_kernel/client_example/18_groupnorm/groupnorm_swish.cpp rename to client_example/18_groupnorm/groupnorm_swish.cpp diff --git a/composable_kernel/client_example/19_pool/CMakeLists.txt b/client_example/19_pool/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/19_pool/CMakeLists.txt rename to client_example/19_pool/CMakeLists.txt diff --git a/composable_kernel/client_example/19_pool/avg_pool3d_bwd.cpp b/client_example/19_pool/avg_pool3d_bwd.cpp similarity index 100% rename from composable_kernel/client_example/19_pool/avg_pool3d_bwd.cpp rename to client_example/19_pool/avg_pool3d_bwd.cpp diff --git a/composable_kernel/client_example/19_pool/avg_pool3d_fwd.cpp b/client_example/19_pool/avg_pool3d_fwd.cpp similarity index 100% rename from composable_kernel/client_example/19_pool/avg_pool3d_fwd.cpp rename to client_example/19_pool/avg_pool3d_fwd.cpp diff --git a/composable_kernel/client_example/19_pool/max_pool2d_bwd.cpp b/client_example/19_pool/max_pool2d_bwd.cpp similarity index 100% rename from composable_kernel/client_example/19_pool/max_pool2d_bwd.cpp rename to client_example/19_pool/max_pool2d_bwd.cpp diff --git a/composable_kernel/client_example/19_pool/max_pool2d_fwd.cpp b/client_example/19_pool/max_pool2d_fwd.cpp similarity index 100% rename from composable_kernel/client_example/19_pool/max_pool2d_fwd.cpp rename to client_example/19_pool/max_pool2d_fwd.cpp diff --git a/composable_kernel/client_example/20_splitk_gemm/CMakeLists.txt b/client_example/20_splitk_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/20_splitk_gemm/CMakeLists.txt rename to client_example/20_splitk_gemm/CMakeLists.txt diff --git a/composable_kernel/client_example/20_splitk_gemm/splitK_gemm_fp16_f8.cpp b/client_example/20_splitk_gemm/splitK_gemm_fp16_f8.cpp similarity index 100% rename from composable_kernel/client_example/20_splitk_gemm/splitK_gemm_fp16_f8.cpp rename to client_example/20_splitk_gemm/splitK_gemm_fp16_f8.cpp diff --git a/composable_kernel/client_example/21_grouped_gemm_bias/CMakeLists.txt b/client_example/21_grouped_gemm_bias/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/21_grouped_gemm_bias/CMakeLists.txt rename to client_example/21_grouped_gemm_bias/CMakeLists.txt diff --git a/composable_kernel/client_example/21_grouped_gemm_bias/grouped_gemm_fixed_nk_bias_fp16.cpp b/client_example/21_grouped_gemm_bias/grouped_gemm_fixed_nk_bias_fp16.cpp similarity index 100% rename from composable_kernel/client_example/21_grouped_gemm_bias/grouped_gemm_fixed_nk_bias_fp16.cpp rename to client_example/21_grouped_gemm_bias/grouped_gemm_fixed_nk_bias_fp16.cpp diff --git a/composable_kernel/client_example/22_grouped_gemm/CMakeLists.txt b/client_example/22_grouped_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/22_grouped_gemm/CMakeLists.txt rename to client_example/22_grouped_gemm/CMakeLists.txt diff --git a/composable_kernel/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp16.cpp b/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp16.cpp similarity index 100% rename from composable_kernel/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp16.cpp rename to client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp16.cpp diff --git a/composable_kernel/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp8.cpp b/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp8.cpp similarity index 100% rename from composable_kernel/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp8.cpp rename to client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp8.cpp diff --git a/composable_kernel/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_i8.cpp b/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_i8.cpp similarity index 100% rename from composable_kernel/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_i8.cpp rename to client_example/22_grouped_gemm/grouped_gemm_fixed_nk_i8.cpp diff --git a/composable_kernel/client_example/22_im2col_col2im/CMakeLists.txt b/client_example/22_im2col_col2im/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/22_im2col_col2im/CMakeLists.txt rename to client_example/22_im2col_col2im/CMakeLists.txt diff --git a/composable_kernel/client_example/22_im2col_col2im/column_to_image.cpp b/client_example/22_im2col_col2im/column_to_image.cpp similarity index 100% rename from composable_kernel/client_example/22_im2col_col2im/column_to_image.cpp rename to client_example/22_im2col_col2im/column_to_image.cpp diff --git a/composable_kernel/client_example/22_im2col_col2im/image_to_column.cpp b/client_example/22_im2col_col2im/image_to_column.cpp similarity index 100% rename from composable_kernel/client_example/22_im2col_col2im/image_to_column.cpp rename to client_example/22_im2col_col2im/image_to_column.cpp diff --git a/composable_kernel/client_example/CMakeLists.txt b/client_example/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/CMakeLists.txt rename to client_example/CMakeLists.txt diff --git a/composable_kernel/client_example/README.md b/client_example/README.md similarity index 100% rename from composable_kernel/client_example/README.md rename to client_example/README.md diff --git a/composable_kernel/cmake/Analyzers.cmake b/cmake/Analyzers.cmake similarity index 100% rename from composable_kernel/cmake/Analyzers.cmake rename to cmake/Analyzers.cmake diff --git a/composable_kernel/cmake/ClangTidy.cmake b/cmake/ClangTidy.cmake similarity index 100% rename from composable_kernel/cmake/ClangTidy.cmake rename to cmake/ClangTidy.cmake diff --git a/composable_kernel/cmake/CppCheck.cmake b/cmake/CppCheck.cmake similarity index 100% rename from composable_kernel/cmake/CppCheck.cmake rename to cmake/CppCheck.cmake diff --git a/composable_kernel/cmake/DoxygenDoc.cmake b/cmake/DoxygenDoc.cmake similarity index 100% rename from composable_kernel/cmake/DoxygenDoc.cmake rename to cmake/DoxygenDoc.cmake diff --git a/composable_kernel/cmake/EnableCompilerWarnings.cmake b/cmake/EnableCompilerWarnings.cmake similarity index 100% rename from composable_kernel/cmake/EnableCompilerWarnings.cmake rename to cmake/EnableCompilerWarnings.cmake diff --git a/composable_kernel/cmake/TargetFlags.cmake b/cmake/TargetFlags.cmake similarity index 100% rename from composable_kernel/cmake/TargetFlags.cmake rename to cmake/TargetFlags.cmake diff --git a/composable_kernel/cmake/googletest.cmake b/cmake/googletest.cmake similarity index 100% rename from composable_kernel/cmake/googletest.cmake rename to cmake/googletest.cmake diff --git a/composable_kernel/dev-requirements.txt b/dev-requirements.txt similarity index 100% rename from composable_kernel/dev-requirements.txt rename to dev-requirements.txt diff --git a/composable_kernel/docs/API_Reference_Guide.rst b/docs/API_Reference_Guide.rst similarity index 100% rename from composable_kernel/docs/API_Reference_Guide.rst rename to docs/API_Reference_Guide.rst diff --git a/composable_kernel/docs/Contributors_Guide.rst b/docs/Contributors_Guide.rst similarity index 100% rename from composable_kernel/docs/Contributors_Guide.rst rename to docs/Contributors_Guide.rst diff --git a/composable_kernel/docs/Supported_Primitives_Guide.rst b/docs/Supported_Primitives_Guide.rst similarity index 100% rename from composable_kernel/docs/Supported_Primitives_Guide.rst rename to docs/Supported_Primitives_Guide.rst diff --git a/composable_kernel/docs/conf.py b/docs/conf.py similarity index 100% rename from composable_kernel/docs/conf.py rename to docs/conf.py diff --git a/composable_kernel/docs/data/ck_component.png b/docs/data/ck_component.png similarity index 100% rename from composable_kernel/docs/data/ck_component.png rename to docs/data/ck_component.png diff --git a/composable_kernel/docs/data/ck_layer.png b/docs/data/ck_layer.png similarity index 100% rename from composable_kernel/docs/data/ck_layer.png rename to docs/data/ck_layer.png diff --git a/composable_kernel/docs/dockerhub.rst b/docs/dockerhub.rst similarity index 100% rename from composable_kernel/docs/dockerhub.rst rename to docs/dockerhub.rst diff --git a/composable_kernel/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile similarity index 100% rename from composable_kernel/docs/doxygen/Doxyfile rename to docs/doxygen/Doxyfile diff --git a/composable_kernel/docs/index.rst b/docs/index.rst similarity index 100% rename from composable_kernel/docs/index.rst rename to docs/index.rst diff --git a/composable_kernel/docs/license.rst b/docs/license.rst similarity index 100% rename from composable_kernel/docs/license.rst rename to docs/license.rst diff --git a/composable_kernel/docs/refs.bib b/docs/refs.bib similarity index 100% rename from composable_kernel/docs/refs.bib rename to docs/refs.bib diff --git a/composable_kernel/docs/sphinx/_toc.yml.in b/docs/sphinx/_toc.yml.in similarity index 100% rename from composable_kernel/docs/sphinx/_toc.yml.in rename to docs/sphinx/_toc.yml.in diff --git a/composable_kernel/docs/sphinx/requirements.in b/docs/sphinx/requirements.in similarity index 100% rename from composable_kernel/docs/sphinx/requirements.in rename to docs/sphinx/requirements.in diff --git a/composable_kernel/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt similarity index 100% rename from composable_kernel/docs/sphinx/requirements.txt rename to docs/sphinx/requirements.txt diff --git a/composable_kernel/docs/tutorial_hello_world.rst b/docs/tutorial_hello_world.rst similarity index 100% rename from composable_kernel/docs/tutorial_hello_world.rst rename to docs/tutorial_hello_world.rst diff --git a/composable_kernel/example/01_gemm/CMakeLists.txt b/example/01_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/01_gemm/CMakeLists.txt rename to example/01_gemm/CMakeLists.txt diff --git a/composable_kernel/example/01_gemm/README.md b/example/01_gemm/README.md similarity index 100% rename from composable_kernel/example/01_gemm/README.md rename to example/01_gemm/README.md diff --git a/composable_kernel/example/01_gemm/common.hpp b/example/01_gemm/common.hpp similarity index 100% rename from composable_kernel/example/01_gemm/common.hpp rename to example/01_gemm/common.hpp diff --git a/composable_kernel/example/01_gemm/gemm_dl_fp16.cpp b/example/01_gemm/gemm_dl_fp16.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_dl_fp16.cpp rename to example/01_gemm/gemm_dl_fp16.cpp diff --git a/composable_kernel/example/01_gemm/gemm_dl_fp32.cpp b/example/01_gemm/gemm_dl_fp32.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_dl_fp32.cpp rename to example/01_gemm/gemm_dl_fp32.cpp diff --git a/composable_kernel/example/01_gemm/gemm_dl_int4.cpp b/example/01_gemm/gemm_dl_int4.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_dl_int4.cpp rename to example/01_gemm/gemm_dl_int4.cpp diff --git a/composable_kernel/example/01_gemm/gemm_dl_int8.cpp b/example/01_gemm/gemm_dl_int8.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_dl_int8.cpp rename to example/01_gemm/gemm_dl_int8.cpp diff --git a/composable_kernel/example/01_gemm/gemm_dpp_fp16.cpp b/example/01_gemm/gemm_dpp_fp16.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_dpp_fp16.cpp rename to example/01_gemm/gemm_dpp_fp16.cpp diff --git a/composable_kernel/example/01_gemm/gemm_wmma_fp16.cpp b/example/01_gemm/gemm_wmma_fp16.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_wmma_fp16.cpp rename to example/01_gemm/gemm_wmma_fp16.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_bf16.cpp b/example/01_gemm/gemm_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_bf16.cpp rename to example/01_gemm/gemm_xdl_bf16.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_bf16_rtn.cpp b/example/01_gemm/gemm_xdl_bf16_rtn.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_bf16_rtn.cpp rename to example/01_gemm/gemm_xdl_bf16_rtn.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_fp16.cpp b/example/01_gemm/gemm_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_fp16.cpp rename to example/01_gemm/gemm_xdl_fp16.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_fp16_fp8.cpp b/example/01_gemm/gemm_xdl_fp16_fp8.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_fp16_fp8.cpp rename to example/01_gemm/gemm_xdl_fp16_fp8.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_fp64.cpp b/example/01_gemm/gemm_xdl_fp64.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_fp64.cpp rename to example/01_gemm/gemm_xdl_fp64.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_fp8.cpp b/example/01_gemm/gemm_xdl_fp8.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_fp8.cpp rename to example/01_gemm/gemm_xdl_fp8.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_fp8_bf8.cpp b/example/01_gemm/gemm_xdl_fp8_bf8.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_fp8_bf8.cpp rename to example/01_gemm/gemm_xdl_fp8_bf8.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_int4.cpp b/example/01_gemm/gemm_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_int4.cpp rename to example/01_gemm/gemm_xdl_int4.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_int8.cpp b/example/01_gemm/gemm_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_int8.cpp rename to example/01_gemm/gemm_xdl_int8.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp b/example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp rename to example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_streamk.cpp b/example/01_gemm/gemm_xdl_streamk.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_streamk.cpp rename to example/01_gemm/gemm_xdl_streamk.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_wavelet_fp16.cpp b/example/01_gemm/gemm_xdl_wavelet_fp16.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_wavelet_fp16.cpp rename to example/01_gemm/gemm_xdl_wavelet_fp16.cpp diff --git a/composable_kernel/example/01_gemm/run_gemm_example.inc b/example/01_gemm/run_gemm_example.inc similarity index 100% rename from composable_kernel/example/01_gemm/run_gemm_example.inc rename to example/01_gemm/run_gemm_example.inc diff --git a/composable_kernel/example/02_gemm_bilinear/CMakeLists.txt b/example/02_gemm_bilinear/CMakeLists.txt similarity index 100% rename from composable_kernel/example/02_gemm_bilinear/CMakeLists.txt rename to example/02_gemm_bilinear/CMakeLists.txt diff --git a/composable_kernel/example/02_gemm_bilinear/README.md b/example/02_gemm_bilinear/README.md similarity index 100% rename from composable_kernel/example/02_gemm_bilinear/README.md rename to example/02_gemm_bilinear/README.md diff --git a/composable_kernel/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp b/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp similarity index 100% rename from composable_kernel/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp rename to example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp diff --git a/composable_kernel/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp b/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp similarity index 100% rename from composable_kernel/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp rename to example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp diff --git a/composable_kernel/example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp b/example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp rename to example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp diff --git a/composable_kernel/example/03_gemm_bias_relu/CMakeLists.txt b/example/03_gemm_bias_relu/CMakeLists.txt similarity index 100% rename from composable_kernel/example/03_gemm_bias_relu/CMakeLists.txt rename to example/03_gemm_bias_relu/CMakeLists.txt diff --git a/composable_kernel/example/03_gemm_bias_relu/README.md b/example/03_gemm_bias_relu/README.md similarity index 100% rename from composable_kernel/example/03_gemm_bias_relu/README.md rename to example/03_gemm_bias_relu/README.md diff --git a/composable_kernel/example/03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp b/example/03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp rename to example/03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/CMakeLists.txt b/example/04_gemm_add_add_fastgelu/CMakeLists.txt similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/CMakeLists.txt rename to example/04_gemm_add_add_fastgelu/CMakeLists.txt diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/README.md b/example/04_gemm_add_add_fastgelu/README.md similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/README.md rename to example/04_gemm_add_add_fastgelu/README.md diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/common.hpp b/example/04_gemm_add_add_fastgelu/common.hpp similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/common.hpp rename to example/04_gemm_add_add_fastgelu/common.hpp diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_bf16.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_bf16.cpp rename to example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_bf16.cpp diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp16.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp16.cpp rename to example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp16.cpp diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp32.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp32.cpp rename to example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp32.cpp diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int4.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int4.cpp rename to example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int4.cpp diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int8.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int8.cpp rename to example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int8.cpp diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/run_gemm_add_add_fastgelu_example.inc b/example/04_gemm_add_add_fastgelu/run_gemm_add_add_fastgelu_example.inc similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/run_gemm_add_add_fastgelu_example.inc rename to example/04_gemm_add_add_fastgelu/run_gemm_add_add_fastgelu_example.inc diff --git a/composable_kernel/example/09_convnd_fwd/CMakeLists.txt b/example/09_convnd_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/example/09_convnd_fwd/CMakeLists.txt rename to example/09_convnd_fwd/CMakeLists.txt diff --git a/composable_kernel/example/09_convnd_fwd/README.md b/example/09_convnd_fwd/README.md similarity index 100% rename from composable_kernel/example/09_convnd_fwd/README.md rename to example/09_convnd_fwd/README.md diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_common.hpp b/example/09_convnd_fwd/convnd_fwd_common.hpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_common.hpp rename to example/09_convnd_fwd/convnd_fwd_common.hpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_dl_common.hpp b/example/09_convnd_fwd/convnd_fwd_dl_common.hpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_dl_common.hpp rename to example/09_convnd_fwd/convnd_fwd_dl_common.hpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_dl_fp16.cpp b/example/09_convnd_fwd/convnd_fwd_dl_fp16.cpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_dl_fp16.cpp rename to example/09_convnd_fwd/convnd_fwd_dl_fp16.cpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_dl_fp32.cpp b/example/09_convnd_fwd/convnd_fwd_dl_fp32.cpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_dl_fp32.cpp rename to example/09_convnd_fwd/convnd_fwd_dl_fp32.cpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_dl_int8.cpp b/example/09_convnd_fwd/convnd_fwd_dl_int8.cpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_dl_int8.cpp rename to example/09_convnd_fwd/convnd_fwd_dl_int8.cpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_bf16.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_bf16.cpp rename to example/09_convnd_fwd/convnd_fwd_xdl_bf16.cpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp rename to example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_fp32.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_fp32.cpp rename to example/09_convnd_fwd/convnd_fwd_xdl_fp32.cpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_fp64.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_fp64.cpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_fp64.cpp rename to example/09_convnd_fwd/convnd_fwd_xdl_fp64.cpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_int8.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_int8.cpp rename to example/09_convnd_fwd/convnd_fwd_xdl_int8.cpp diff --git a/composable_kernel/example/09_convnd_fwd/run_convnd_fwd_dl_example.inc b/example/09_convnd_fwd/run_convnd_fwd_dl_example.inc similarity index 100% rename from composable_kernel/example/09_convnd_fwd/run_convnd_fwd_dl_example.inc rename to example/09_convnd_fwd/run_convnd_fwd_dl_example.inc diff --git a/composable_kernel/example/09_convnd_fwd/run_convnd_fwd_example.inc b/example/09_convnd_fwd/run_convnd_fwd_example.inc similarity index 100% rename from composable_kernel/example/09_convnd_fwd/run_convnd_fwd_example.inc rename to example/09_convnd_fwd/run_convnd_fwd_example.inc diff --git a/composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/CMakeLists.txt b/example/10_convnd_fwd_multiple_d_multiple_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/CMakeLists.txt rename to example/10_convnd_fwd_multiple_d_multiple_reduce/CMakeLists.txt diff --git a/composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp similarity index 100% rename from composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp rename to example/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp diff --git a/composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_bf16.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_bf16.cpp rename to example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_bf16.cpp diff --git a/composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp16.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp16.cpp rename to example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp16.cpp diff --git a/composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp32.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp32.cpp rename to example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp32.cpp diff --git a/composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int4.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int4.cpp rename to example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int4.cpp diff --git a/composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int8.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int8.cpp rename to example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int8.cpp diff --git a/composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/run_convnd_fwd_max_example.inc b/example/10_convnd_fwd_multiple_d_multiple_reduce/run_convnd_fwd_max_example.inc similarity index 100% rename from composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/run_convnd_fwd_max_example.inc rename to example/10_convnd_fwd_multiple_d_multiple_reduce/run_convnd_fwd_max_example.inc diff --git a/composable_kernel/example/12_reduce/CMakeLists.txt b/example/12_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/example/12_reduce/CMakeLists.txt rename to example/12_reduce/CMakeLists.txt diff --git a/composable_kernel/example/12_reduce/README.md b/example/12_reduce/README.md similarity index 100% rename from composable_kernel/example/12_reduce/README.md rename to example/12_reduce/README.md diff --git a/composable_kernel/example/12_reduce/reduce_blockwise.cpp b/example/12_reduce/reduce_blockwise.cpp similarity index 100% rename from composable_kernel/example/12_reduce/reduce_blockwise.cpp rename to example/12_reduce/reduce_blockwise.cpp diff --git a/composable_kernel/example/12_reduce/reduce_blockwise_impl.hpp b/example/12_reduce/reduce_blockwise_impl.hpp similarity index 100% rename from composable_kernel/example/12_reduce/reduce_blockwise_impl.hpp rename to example/12_reduce/reduce_blockwise_impl.hpp diff --git a/composable_kernel/example/12_reduce/reduce_blockwise_two_call.cpp b/example/12_reduce/reduce_blockwise_two_call.cpp similarity index 100% rename from composable_kernel/example/12_reduce/reduce_blockwise_two_call.cpp rename to example/12_reduce/reduce_blockwise_two_call.cpp diff --git a/composable_kernel/example/12_reduce/reduce_example_common.hpp b/example/12_reduce/reduce_example_common.hpp similarity index 100% rename from composable_kernel/example/12_reduce/reduce_example_common.hpp rename to example/12_reduce/reduce_example_common.hpp diff --git a/composable_kernel/example/12_reduce/reduce_multiblock_atomic_add.cpp b/example/12_reduce/reduce_multiblock_atomic_add.cpp similarity index 100% rename from composable_kernel/example/12_reduce/reduce_multiblock_atomic_add.cpp rename to example/12_reduce/reduce_multiblock_atomic_add.cpp diff --git a/composable_kernel/example/12_reduce/reduce_multiblock_atomic_add_impl.hpp b/example/12_reduce/reduce_multiblock_atomic_add_impl.hpp similarity index 100% rename from composable_kernel/example/12_reduce/reduce_multiblock_atomic_add_impl.hpp rename to example/12_reduce/reduce_multiblock_atomic_add_impl.hpp diff --git a/composable_kernel/example/13_pool2d_fwd/CMakeLists.txt b/example/13_pool2d_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/example/13_pool2d_fwd/CMakeLists.txt rename to example/13_pool2d_fwd/CMakeLists.txt diff --git a/composable_kernel/example/13_pool2d_fwd/README.md b/example/13_pool2d_fwd/README.md similarity index 100% rename from composable_kernel/example/13_pool2d_fwd/README.md rename to example/13_pool2d_fwd/README.md diff --git a/composable_kernel/example/13_pool2d_fwd/pool2d_fwd_common.hpp b/example/13_pool2d_fwd/pool2d_fwd_common.hpp similarity index 100% rename from composable_kernel/example/13_pool2d_fwd/pool2d_fwd_common.hpp rename to example/13_pool2d_fwd/pool2d_fwd_common.hpp diff --git a/composable_kernel/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp b/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp similarity index 100% rename from composable_kernel/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp rename to example/13_pool2d_fwd/pool2d_fwd_fp16.cpp diff --git a/composable_kernel/example/13_pool2d_fwd/pool2d_fwd_fp32.cpp b/example/13_pool2d_fwd/pool2d_fwd_fp32.cpp similarity index 100% rename from composable_kernel/example/13_pool2d_fwd/pool2d_fwd_fp32.cpp rename to example/13_pool2d_fwd/pool2d_fwd_fp32.cpp diff --git a/composable_kernel/example/14_gemm_quantization/CMakeLists.txt b/example/14_gemm_quantization/CMakeLists.txt similarity index 100% rename from composable_kernel/example/14_gemm_quantization/CMakeLists.txt rename to example/14_gemm_quantization/CMakeLists.txt diff --git a/composable_kernel/example/14_gemm_quantization/gemm_dl_quantization_int8.cpp b/example/14_gemm_quantization/gemm_dl_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/14_gemm_quantization/gemm_dl_quantization_int8.cpp rename to example/14_gemm_quantization/gemm_dl_quantization_int8.cpp diff --git a/composable_kernel/example/14_gemm_quantization/gemm_xdl_bias_relu_quantization_int8.cpp b/example/14_gemm_quantization/gemm_xdl_bias_relu_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/14_gemm_quantization/gemm_xdl_bias_relu_quantization_int8.cpp rename to example/14_gemm_quantization/gemm_xdl_bias_relu_quantization_int8.cpp diff --git a/composable_kernel/example/14_gemm_quantization/gemm_xdl_quantization_int8.cpp b/example/14_gemm_quantization/gemm_xdl_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/14_gemm_quantization/gemm_xdl_quantization_int8.cpp rename to example/14_gemm_quantization/gemm_xdl_quantization_int8.cpp diff --git a/composable_kernel/example/15_grouped_gemm/CMakeLists.txt b/example/15_grouped_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/15_grouped_gemm/CMakeLists.txt rename to example/15_grouped_gemm/CMakeLists.txt diff --git a/composable_kernel/example/15_grouped_gemm/README.md b/example/15_grouped_gemm/README.md similarity index 100% rename from composable_kernel/example/15_grouped_gemm/README.md rename to example/15_grouped_gemm/README.md diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_multiple_d_dl_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_multiple_d_dl_fp16.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_multiple_d_dl_fp16.cpp rename to example/15_grouped_gemm/grouped_gemm_multiple_d_dl_fp16.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_bf16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_bf16.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_bf16.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp8.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp8.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp8.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp8.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fp32.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fp32.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_fp32.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_int4.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_int4.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_int4.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_int8.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_int8.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_int8.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_splitk_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_splitk_fp16.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_splitk_fp16.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_splitk_fp16.cpp diff --git a/composable_kernel/example/15_grouped_gemm/run_grouped_gemm_example.inc b/example/15_grouped_gemm/run_grouped_gemm_example.inc similarity index 100% rename from composable_kernel/example/15_grouped_gemm/run_grouped_gemm_example.inc rename to example/15_grouped_gemm/run_grouped_gemm_example.inc diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/CMakeLists.txt b/example/16_gemm_multi_d_multi_reduces/CMakeLists.txt similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/CMakeLists.txt rename to example/16_gemm_multi_d_multi_reduces/CMakeLists.txt diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp b/example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp rename to example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp diff --git a/composable_kernel/example/17_convnd_bwd_data/CMakeLists.txt b/example/17_convnd_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/example/17_convnd_bwd_data/CMakeLists.txt rename to example/17_convnd_bwd_data/CMakeLists.txt diff --git a/composable_kernel/example/17_convnd_bwd_data/README.md b/example/17_convnd_bwd_data/README.md similarity index 100% rename from composable_kernel/example/17_convnd_bwd_data/README.md rename to example/17_convnd_bwd_data/README.md diff --git a/composable_kernel/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp b/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp similarity index 100% rename from composable_kernel/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp rename to example/17_convnd_bwd_data/convnd_bwd_data_common.hpp diff --git a/composable_kernel/example/17_convnd_bwd_data/convnd_bwd_data_dl_fp16.cpp b/example/17_convnd_bwd_data/convnd_bwd_data_dl_fp16.cpp similarity index 100% rename from composable_kernel/example/17_convnd_bwd_data/convnd_bwd_data_dl_fp16.cpp rename to example/17_convnd_bwd_data/convnd_bwd_data_dl_fp16.cpp diff --git a/composable_kernel/example/17_convnd_bwd_data/convnd_bwd_data_xdl_fp16.cpp b/example/17_convnd_bwd_data/convnd_bwd_data_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/17_convnd_bwd_data/convnd_bwd_data_xdl_fp16.cpp rename to example/17_convnd_bwd_data/convnd_bwd_data_xdl_fp16.cpp diff --git a/composable_kernel/example/18_batched_gemm_reduce/CMakeLists.txt b/example/18_batched_gemm_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/example/18_batched_gemm_reduce/CMakeLists.txt rename to example/18_batched_gemm_reduce/CMakeLists.txt diff --git a/composable_kernel/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp b/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp rename to example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp diff --git a/composable_kernel/example/19_binary_elementwise/CMakeLists.txt b/example/19_binary_elementwise/CMakeLists.txt similarity index 100% rename from composable_kernel/example/19_binary_elementwise/CMakeLists.txt rename to example/19_binary_elementwise/CMakeLists.txt diff --git a/composable_kernel/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp b/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp similarity index 100% rename from composable_kernel/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp rename to example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp diff --git a/composable_kernel/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp b/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp similarity index 100% rename from composable_kernel/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp rename to example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp diff --git a/composable_kernel/example/19_binary_elementwise/elementwise_add_1d.cpp b/example/19_binary_elementwise/elementwise_add_1d.cpp similarity index 100% rename from composable_kernel/example/19_binary_elementwise/elementwise_add_1d.cpp rename to example/19_binary_elementwise/elementwise_add_1d.cpp diff --git a/composable_kernel/example/19_binary_elementwise/elementwise_add_4d.cpp b/example/19_binary_elementwise/elementwise_add_4d.cpp similarity index 100% rename from composable_kernel/example/19_binary_elementwise/elementwise_add_4d.cpp rename to example/19_binary_elementwise/elementwise_add_4d.cpp diff --git a/composable_kernel/example/20_grouped_conv_bwd_weight/CMakeLists.txt b/example/20_grouped_conv_bwd_weight/CMakeLists.txt similarity index 100% rename from composable_kernel/example/20_grouped_conv_bwd_weight/CMakeLists.txt rename to example/20_grouped_conv_bwd_weight/CMakeLists.txt diff --git a/composable_kernel/example/20_grouped_conv_bwd_weight/common.hpp b/example/20_grouped_conv_bwd_weight/common.hpp similarity index 100% rename from composable_kernel/example/20_grouped_conv_bwd_weight/common.hpp rename to example/20_grouped_conv_bwd_weight/common.hpp diff --git a/composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_dl_fp16.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_dl_fp16.cpp similarity index 100% rename from composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_dl_fp16.cpp rename to example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_dl_fp16.cpp diff --git a/composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_wmma_fp16.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_wmma_fp16.cpp similarity index 100% rename from composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_wmma_fp16.cpp rename to example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_wmma_fp16.cpp diff --git a/composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_bf16.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_bf16.cpp rename to example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_bf16.cpp diff --git a/composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16.cpp rename to example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16.cpp diff --git a/composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp similarity index 100% rename from composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp rename to example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp diff --git a/composable_kernel/example/20_grouped_conv_bwd_weight/run_grouped_conv_bwd_weight_example.inc b/example/20_grouped_conv_bwd_weight/run_grouped_conv_bwd_weight_example.inc similarity index 100% rename from composable_kernel/example/20_grouped_conv_bwd_weight/run_grouped_conv_bwd_weight_example.inc rename to example/20_grouped_conv_bwd_weight/run_grouped_conv_bwd_weight_example.inc diff --git a/composable_kernel/example/21_gemm_layernorm/CMakeLists.txt b/example/21_gemm_layernorm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/21_gemm_layernorm/CMakeLists.txt rename to example/21_gemm_layernorm/CMakeLists.txt diff --git a/composable_kernel/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp b/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp similarity index 100% rename from composable_kernel/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp rename to example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp diff --git a/composable_kernel/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp b/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp similarity index 100% rename from composable_kernel/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp rename to example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp diff --git a/composable_kernel/example/21_gemm_layernorm/gemm_layernorm_xdl_naive_fp16.cpp b/example/21_gemm_layernorm/gemm_layernorm_xdl_naive_fp16.cpp similarity index 100% rename from composable_kernel/example/21_gemm_layernorm/gemm_layernorm_xdl_naive_fp16.cpp rename to example/21_gemm_layernorm/gemm_layernorm_xdl_naive_fp16.cpp diff --git a/composable_kernel/example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp b/example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp similarity index 100% rename from composable_kernel/example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp rename to example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp diff --git a/composable_kernel/example/22_cgemm/CMakeLists.txt b/example/22_cgemm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/22_cgemm/CMakeLists.txt rename to example/22_cgemm/CMakeLists.txt diff --git a/composable_kernel/example/22_cgemm/cgemm_xdl_bf16.cpp b/example/22_cgemm/cgemm_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/22_cgemm/cgemm_xdl_bf16.cpp rename to example/22_cgemm/cgemm_xdl_bf16.cpp diff --git a/composable_kernel/example/22_cgemm/cgemm_xdl_common.hpp b/example/22_cgemm/cgemm_xdl_common.hpp similarity index 100% rename from composable_kernel/example/22_cgemm/cgemm_xdl_common.hpp rename to example/22_cgemm/cgemm_xdl_common.hpp diff --git a/composable_kernel/example/22_cgemm/cgemm_xdl_fp16.cpp b/example/22_cgemm/cgemm_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/22_cgemm/cgemm_xdl_fp16.cpp rename to example/22_cgemm/cgemm_xdl_fp16.cpp diff --git a/composable_kernel/example/22_cgemm/cgemm_xdl_fp32.cpp b/example/22_cgemm/cgemm_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/22_cgemm/cgemm_xdl_fp32.cpp rename to example/22_cgemm/cgemm_xdl_fp32.cpp diff --git a/composable_kernel/example/22_cgemm/cgemm_xdl_int4.cpp b/example/22_cgemm/cgemm_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/22_cgemm/cgemm_xdl_int4.cpp rename to example/22_cgemm/cgemm_xdl_int4.cpp diff --git a/composable_kernel/example/22_cgemm/cgemm_xdl_int8.cpp b/example/22_cgemm/cgemm_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/22_cgemm/cgemm_xdl_int8.cpp rename to example/22_cgemm/cgemm_xdl_int8.cpp diff --git a/composable_kernel/example/23_softmax/CMakeLists.txt b/example/23_softmax/CMakeLists.txt similarity index 100% rename from composable_kernel/example/23_softmax/CMakeLists.txt rename to example/23_softmax/CMakeLists.txt diff --git a/composable_kernel/example/23_softmax/README.md b/example/23_softmax/README.md similarity index 100% rename from composable_kernel/example/23_softmax/README.md rename to example/23_softmax/README.md diff --git a/composable_kernel/example/23_softmax/softmax_blockwise.cpp b/example/23_softmax/softmax_blockwise.cpp similarity index 100% rename from composable_kernel/example/23_softmax/softmax_blockwise.cpp rename to example/23_softmax/softmax_blockwise.cpp diff --git a/composable_kernel/example/24_batched_gemm/CMakeLists.txt b/example/24_batched_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/24_batched_gemm/CMakeLists.txt rename to example/24_batched_gemm/CMakeLists.txt diff --git a/composable_kernel/example/24_batched_gemm/batched_gemm_xdl_bf16.cpp b/example/24_batched_gemm/batched_gemm_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/24_batched_gemm/batched_gemm_xdl_bf16.cpp rename to example/24_batched_gemm/batched_gemm_xdl_bf16.cpp diff --git a/composable_kernel/example/24_batched_gemm/batched_gemm_xdl_fp16.cpp b/example/24_batched_gemm/batched_gemm_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/24_batched_gemm/batched_gemm_xdl_fp16.cpp rename to example/24_batched_gemm/batched_gemm_xdl_fp16.cpp diff --git a/composable_kernel/example/24_batched_gemm/batched_gemm_xdl_fp32.cpp b/example/24_batched_gemm/batched_gemm_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/24_batched_gemm/batched_gemm_xdl_fp32.cpp rename to example/24_batched_gemm/batched_gemm_xdl_fp32.cpp diff --git a/composable_kernel/example/24_batched_gemm/batched_gemm_xdl_int4.cpp b/example/24_batched_gemm/batched_gemm_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/24_batched_gemm/batched_gemm_xdl_int4.cpp rename to example/24_batched_gemm/batched_gemm_xdl_int4.cpp diff --git a/composable_kernel/example/24_batched_gemm/batched_gemm_xdl_int8.cpp b/example/24_batched_gemm/batched_gemm_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/24_batched_gemm/batched_gemm_xdl_int8.cpp rename to example/24_batched_gemm/batched_gemm_xdl_int8.cpp diff --git a/composable_kernel/example/24_batched_gemm/run_batched_gemm_example.inc b/example/24_batched_gemm/run_batched_gemm_example.inc similarity index 100% rename from composable_kernel/example/24_batched_gemm/run_batched_gemm_example.inc rename to example/24_batched_gemm/run_batched_gemm_example.inc diff --git a/composable_kernel/example/25_gemm_bias_e_permute/CMakeLists.txt b/example/25_gemm_bias_e_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/example/25_gemm_bias_e_permute/CMakeLists.txt rename to example/25_gemm_bias_e_permute/CMakeLists.txt diff --git a/composable_kernel/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp b/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp rename to example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp diff --git a/composable_kernel/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp b/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp rename to example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp diff --git a/composable_kernel/example/26_contraction/CMakeLists.txt b/example/26_contraction/CMakeLists.txt similarity index 100% rename from composable_kernel/example/26_contraction/CMakeLists.txt rename to example/26_contraction/CMakeLists.txt diff --git a/composable_kernel/example/26_contraction/README.md b/example/26_contraction/README.md similarity index 100% rename from composable_kernel/example/26_contraction/README.md rename to example/26_contraction/README.md diff --git a/composable_kernel/example/26_contraction/contraction_bilinear_xdl_fp32.cpp b/example/26_contraction/contraction_bilinear_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/26_contraction/contraction_bilinear_xdl_fp32.cpp rename to example/26_contraction/contraction_bilinear_xdl_fp32.cpp diff --git a/composable_kernel/example/26_contraction/contraction_bilinear_xdl_fp64.cpp b/example/26_contraction/contraction_bilinear_xdl_fp64.cpp similarity index 100% rename from composable_kernel/example/26_contraction/contraction_bilinear_xdl_fp64.cpp rename to example/26_contraction/contraction_bilinear_xdl_fp64.cpp diff --git a/composable_kernel/example/26_contraction/contraction_scale_xdl_fp32.cpp b/example/26_contraction/contraction_scale_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/26_contraction/contraction_scale_xdl_fp32.cpp rename to example/26_contraction/contraction_scale_xdl_fp32.cpp diff --git a/composable_kernel/example/26_contraction/contraction_scale_xdl_fp64.cpp b/example/26_contraction/contraction_scale_xdl_fp64.cpp similarity index 100% rename from composable_kernel/example/26_contraction/contraction_scale_xdl_fp64.cpp rename to example/26_contraction/contraction_scale_xdl_fp64.cpp diff --git a/composable_kernel/example/27_layernorm/CMakeLists.txt b/example/27_layernorm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/27_layernorm/CMakeLists.txt rename to example/27_layernorm/CMakeLists.txt diff --git a/composable_kernel/example/27_layernorm/common.hpp b/example/27_layernorm/common.hpp similarity index 100% rename from composable_kernel/example/27_layernorm/common.hpp rename to example/27_layernorm/common.hpp diff --git a/composable_kernel/example/27_layernorm/layernorm_fp16.cpp b/example/27_layernorm/layernorm_fp16.cpp similarity index 100% rename from composable_kernel/example/27_layernorm/layernorm_fp16.cpp rename to example/27_layernorm/layernorm_fp16.cpp diff --git a/composable_kernel/example/27_layernorm/layernorm_splitk_fp16.cpp b/example/27_layernorm/layernorm_splitk_fp16.cpp similarity index 100% rename from composable_kernel/example/27_layernorm/layernorm_splitk_fp16.cpp rename to example/27_layernorm/layernorm_splitk_fp16.cpp diff --git a/composable_kernel/example/27_layernorm/run_layernorm_example.inc b/example/27_layernorm/run_layernorm_example.inc similarity index 100% rename from composable_kernel/example/27_layernorm/run_layernorm_example.inc rename to example/27_layernorm/run_layernorm_example.inc diff --git a/composable_kernel/example/28_grouped_gemm_bias_e_permute/CMakeLists.txt b/example/28_grouped_gemm_bias_e_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/example/28_grouped_gemm_bias_e_permute/CMakeLists.txt rename to example/28_grouped_gemm_bias_e_permute/CMakeLists.txt diff --git a/composable_kernel/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp b/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp rename to example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp diff --git a/composable_kernel/example/29_batched_gemm_bias_e_permute/CMakeLists.txt b/example/29_batched_gemm_bias_e_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/example/29_batched_gemm_bias_e_permute/CMakeLists.txt rename to example/29_batched_gemm_bias_e_permute/CMakeLists.txt diff --git a/composable_kernel/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp b/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp similarity index 100% rename from composable_kernel/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp rename to example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp diff --git a/composable_kernel/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp b/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp rename to example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/CMakeLists.txt b/example/30_grouped_conv_fwd_multiple_d/CMakeLists.txt similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/CMakeLists.txt rename to example/30_grouped_conv_fwd_multiple_d/CMakeLists.txt diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/README.md b/example/30_grouped_conv_fwd_multiple_d/README.md similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/README.md rename to example/30_grouped_conv_fwd_multiple_d/README.md diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/common.hpp b/example/30_grouped_conv_fwd_multiple_d/common.hpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/common.hpp rename to example/30_grouped_conv_fwd_multiple_d/common.hpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp b/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp rename to example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_fp16.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_fp16.cpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_fp16.cpp rename to example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_fp16.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_int8.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_int8.cpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_int8.cpp rename to example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_int8.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp rename to example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp rename to example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp rename to example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int4.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int4.cpp rename to example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int4.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp rename to example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_xdl_fp16.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_xdl_fp16.cpp rename to example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_xdl_fp16.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_example.inc b/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_example.inc similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_example.inc rename to example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_example.inc diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_wmma_example.inc b/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_wmma_example.inc similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_wmma_example.inc rename to example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_wmma_example.inc diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc b/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc rename to example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc diff --git a/composable_kernel/example/31_batched_gemm_gemm/CMakeLists.txt b/example/31_batched_gemm_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/31_batched_gemm_gemm/CMakeLists.txt rename to example/31_batched_gemm_gemm/CMakeLists.txt diff --git a/composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_bf16.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_bf16.cpp rename to example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_bf16.cpp diff --git a/composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp16.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp16.cpp rename to example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp16.cpp diff --git a/composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp32.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp32.cpp rename to example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp32.cpp diff --git a/composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp rename to example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp diff --git a/composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp rename to example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp diff --git a/composable_kernel/example/31_batched_gemm_gemm/run_batched_gemm_gemm_example.inc b/example/31_batched_gemm_gemm/run_batched_gemm_gemm_example.inc similarity index 100% rename from composable_kernel/example/31_batched_gemm_gemm/run_batched_gemm_gemm_example.inc rename to example/31_batched_gemm_gemm/run_batched_gemm_gemm_example.inc diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/CMakeLists.txt b/example/32_batched_gemm_scale_softmax_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/CMakeLists.txt rename to example/32_batched_gemm_scale_softmax_gemm/CMakeLists.txt diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp rename to example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp rename to example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp rename to example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp rename to example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp rename to example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp rename to example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp rename to example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm.inc b/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm.inc similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm.inc rename to example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm.inc diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc b/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc rename to example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/run_grouped_gemm_scale_softmax_gemm_permute.inc b/example/32_batched_gemm_scale_softmax_gemm/run_grouped_gemm_scale_softmax_gemm_permute.inc similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/run_grouped_gemm_scale_softmax_gemm_permute.inc rename to example/32_batched_gemm_scale_softmax_gemm/run_grouped_gemm_scale_softmax_gemm_permute.inc diff --git a/composable_kernel/example/33_multiple_reduce/CMakeLists.txt b/example/33_multiple_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/example/33_multiple_reduce/CMakeLists.txt rename to example/33_multiple_reduce/CMakeLists.txt diff --git a/composable_kernel/example/33_multiple_reduce/README.md b/example/33_multiple_reduce/README.md similarity index 100% rename from composable_kernel/example/33_multiple_reduce/README.md rename to example/33_multiple_reduce/README.md diff --git a/composable_kernel/example/33_multiple_reduce/dual_reduce_common.hpp b/example/33_multiple_reduce/dual_reduce_common.hpp similarity index 100% rename from composable_kernel/example/33_multiple_reduce/dual_reduce_common.hpp rename to example/33_multiple_reduce/dual_reduce_common.hpp diff --git a/composable_kernel/example/33_multiple_reduce/dual_reduce_multiblock.cpp b/example/33_multiple_reduce/dual_reduce_multiblock.cpp similarity index 100% rename from composable_kernel/example/33_multiple_reduce/dual_reduce_multiblock.cpp rename to example/33_multiple_reduce/dual_reduce_multiblock.cpp diff --git a/composable_kernel/example/33_multiple_reduce/dual_reduce_threadwise.cpp b/example/33_multiple_reduce/dual_reduce_threadwise.cpp similarity index 100% rename from composable_kernel/example/33_multiple_reduce/dual_reduce_threadwise.cpp rename to example/33_multiple_reduce/dual_reduce_threadwise.cpp diff --git a/composable_kernel/example/34_batchnorm/CMakeLists.txt b/example/34_batchnorm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/34_batchnorm/CMakeLists.txt rename to example/34_batchnorm/CMakeLists.txt diff --git a/composable_kernel/example/34_batchnorm/README.md b/example/34_batchnorm/README.md similarity index 100% rename from composable_kernel/example/34_batchnorm/README.md rename to example/34_batchnorm/README.md diff --git a/composable_kernel/example/34_batchnorm/batchnorm_backward_nhwc.cpp b/example/34_batchnorm/batchnorm_backward_nhwc.cpp similarity index 100% rename from composable_kernel/example/34_batchnorm/batchnorm_backward_nhwc.cpp rename to example/34_batchnorm/batchnorm_backward_nhwc.cpp diff --git a/composable_kernel/example/34_batchnorm/batchnorm_common.hpp b/example/34_batchnorm/batchnorm_common.hpp similarity index 100% rename from composable_kernel/example/34_batchnorm/batchnorm_common.hpp rename to example/34_batchnorm/batchnorm_common.hpp diff --git a/composable_kernel/example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp b/example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp similarity index 100% rename from composable_kernel/example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp rename to example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp diff --git a/composable_kernel/example/34_batchnorm/batchnorm_forward_training_nhwc.cpp b/example/34_batchnorm/batchnorm_forward_training_nhwc.cpp similarity index 100% rename from composable_kernel/example/34_batchnorm/batchnorm_forward_training_nhwc.cpp rename to example/34_batchnorm/batchnorm_forward_training_nhwc.cpp diff --git a/composable_kernel/example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp b/example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp similarity index 100% rename from composable_kernel/example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp rename to example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp diff --git a/composable_kernel/example/34_batchnorm/batchnorm_infer_impl.hpp b/example/34_batchnorm/batchnorm_infer_impl.hpp similarity index 100% rename from composable_kernel/example/34_batchnorm/batchnorm_infer_impl.hpp rename to example/34_batchnorm/batchnorm_infer_impl.hpp diff --git a/composable_kernel/example/35_splitK_gemm/CMakeLists.txt b/example/35_splitK_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/35_splitK_gemm/CMakeLists.txt rename to example/35_splitK_gemm/CMakeLists.txt diff --git a/composable_kernel/example/35_splitK_gemm/run_splitK_gemm_example.inc b/example/35_splitK_gemm/run_splitK_gemm_example.inc similarity index 100% rename from composable_kernel/example/35_splitK_gemm/run_splitK_gemm_example.inc rename to example/35_splitK_gemm/run_splitK_gemm_example.inc diff --git a/composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_bf16.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_bf16.cpp rename to example/35_splitK_gemm/splitK_gemm_xdl_bf16.cpp diff --git a/composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp rename to example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp diff --git a/composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp rename to example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp diff --git a/composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp rename to example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp diff --git a/composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp rename to example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp diff --git a/composable_kernel/example/36_sparse_embedding/CMakeLists.txt b/example/36_sparse_embedding/CMakeLists.txt similarity index 100% rename from composable_kernel/example/36_sparse_embedding/CMakeLists.txt rename to example/36_sparse_embedding/CMakeLists.txt diff --git a/composable_kernel/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp b/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp similarity index 100% rename from composable_kernel/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp rename to example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp diff --git a/composable_kernel/example/37_batched_gemm_add_add_relu_gemm_add/CMakeLists.txt b/example/37_batched_gemm_add_add_relu_gemm_add/CMakeLists.txt similarity index 100% rename from composable_kernel/example/37_batched_gemm_add_add_relu_gemm_add/CMakeLists.txt rename to example/37_batched_gemm_add_add_relu_gemm_add/CMakeLists.txt diff --git a/composable_kernel/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp b/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp rename to example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp diff --git a/composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/CMakeLists.txt b/example/38_grouped_conv_bwd_data_multiple_d/CMakeLists.txt similarity index 100% rename from composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/CMakeLists.txt rename to example/38_grouped_conv_bwd_data_multiple_d/CMakeLists.txt diff --git a/composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/common.hpp b/example/38_grouped_conv_bwd_data_multiple_d/common.hpp similarity index 100% rename from composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/common.hpp rename to example/38_grouped_conv_bwd_data_multiple_d/common.hpp diff --git a/composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp b/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp rename to example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp diff --git a/composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_wmma_fp16.cpp b/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_wmma_fp16.cpp similarity index 100% rename from composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_wmma_fp16.cpp rename to example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_wmma_fp16.cpp diff --git a/composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16.cpp b/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16.cpp rename to example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16.cpp diff --git a/composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_bias_relu_example.inc b/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_bias_relu_example.inc similarity index 100% rename from composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_bias_relu_example.inc rename to example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_bias_relu_example.inc diff --git a/composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc b/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc similarity index 100% rename from composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc rename to example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc diff --git a/composable_kernel/example/39_permute/CMakeLists.txt b/example/39_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/example/39_permute/CMakeLists.txt rename to example/39_permute/CMakeLists.txt diff --git a/composable_kernel/example/39_permute/common.hpp b/example/39_permute/common.hpp similarity index 100% rename from composable_kernel/example/39_permute/common.hpp rename to example/39_permute/common.hpp diff --git a/composable_kernel/example/39_permute/permute_1xHxW_fp16.cpp b/example/39_permute/permute_1xHxW_fp16.cpp similarity index 100% rename from composable_kernel/example/39_permute/permute_1xHxW_fp16.cpp rename to example/39_permute/permute_1xHxW_fp16.cpp diff --git a/composable_kernel/example/39_permute/permute_HxWx4_fp16.cpp b/example/39_permute/permute_HxWx4_fp16.cpp similarity index 100% rename from composable_kernel/example/39_permute/permute_HxWx4_fp16.cpp rename to example/39_permute/permute_HxWx4_fp16.cpp diff --git a/composable_kernel/example/39_permute/permute_NxHxW_fp16.cpp b/example/39_permute/permute_NxHxW_fp16.cpp similarity index 100% rename from composable_kernel/example/39_permute/permute_NxHxW_fp16.cpp rename to example/39_permute/permute_NxHxW_fp16.cpp diff --git a/composable_kernel/example/39_permute/run_permute_bundle_example.inc b/example/39_permute/run_permute_bundle_example.inc similarity index 100% rename from composable_kernel/example/39_permute/run_permute_bundle_example.inc rename to example/39_permute/run_permute_bundle_example.inc diff --git a/composable_kernel/example/39_permute/run_permute_element_example.inc b/example/39_permute/run_permute_element_example.inc similarity index 100% rename from composable_kernel/example/39_permute/run_permute_element_example.inc rename to example/39_permute/run_permute_element_example.inc diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/CMakeLists.txt b/example/40_conv2d_fwd_quantization/CMakeLists.txt similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/CMakeLists.txt rename to example/40_conv2d_fwd_quantization/CMakeLists.txt diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/common.hpp b/example/40_conv2d_fwd_quantization/common.hpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/common.hpp rename to example/40_conv2d_fwd_quantization/common.hpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perlayer_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perlayer_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perlayer_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perlayer_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perlayer_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perlayer_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perchannel_quantization_example.inc b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perchannel_quantization_example.inc similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perchannel_quantization_example.inc rename to example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perchannel_quantization_example.inc diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perlayer_quantization_example.inc b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perlayer_quantization_example.inc similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perlayer_quantization_example.inc rename to example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perlayer_quantization_example.inc diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perchannel_quantization_example.inc b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perchannel_quantization_example.inc similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perchannel_quantization_example.inc rename to example/40_conv2d_fwd_quantization/run_conv2d_fwd_perchannel_quantization_example.inc diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc rename to example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc diff --git a/composable_kernel/example/41_grouped_conv_conv_fwd/CMakeLists.txt b/example/41_grouped_conv_conv_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/example/41_grouped_conv_conv_fwd/CMakeLists.txt rename to example/41_grouped_conv_conv_fwd/CMakeLists.txt diff --git a/composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp rename to example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp diff --git a/composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp rename to example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp diff --git a/composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp rename to example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp diff --git a/composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp rename to example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp diff --git a/composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp rename to example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp diff --git a/composable_kernel/example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc b/example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc similarity index 100% rename from composable_kernel/example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc rename to example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc diff --git a/composable_kernel/example/42_groupnorm/CMakeLists.txt b/example/42_groupnorm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/42_groupnorm/CMakeLists.txt rename to example/42_groupnorm/CMakeLists.txt diff --git a/composable_kernel/example/42_groupnorm/common.hpp b/example/42_groupnorm/common.hpp similarity index 100% rename from composable_kernel/example/42_groupnorm/common.hpp rename to example/42_groupnorm/common.hpp diff --git a/composable_kernel/example/42_groupnorm/groupnorm_sigmoid_mul_fp16.cpp b/example/42_groupnorm/groupnorm_sigmoid_mul_fp16.cpp similarity index 100% rename from composable_kernel/example/42_groupnorm/groupnorm_sigmoid_mul_fp16.cpp rename to example/42_groupnorm/groupnorm_sigmoid_mul_fp16.cpp diff --git a/composable_kernel/example/42_groupnorm/groupnorm_splitk_fp16.cpp b/example/42_groupnorm/groupnorm_splitk_fp16.cpp similarity index 100% rename from composable_kernel/example/42_groupnorm/groupnorm_splitk_fp16.cpp rename to example/42_groupnorm/groupnorm_splitk_fp16.cpp diff --git a/composable_kernel/example/42_groupnorm/groupnorm_swish_fp16.cpp b/example/42_groupnorm/groupnorm_swish_fp16.cpp similarity index 100% rename from composable_kernel/example/42_groupnorm/groupnorm_swish_fp16.cpp rename to example/42_groupnorm/groupnorm_swish_fp16.cpp diff --git a/composable_kernel/example/42_groupnorm/run_groupnorm_example.inc b/example/42_groupnorm/run_groupnorm_example.inc similarity index 100% rename from composable_kernel/example/42_groupnorm/run_groupnorm_example.inc rename to example/42_groupnorm/run_groupnorm_example.inc diff --git a/composable_kernel/example/43_splitk_gemm_bias_e_permute/CMakeLists.txt b/example/43_splitk_gemm_bias_e_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/example/43_splitk_gemm_bias_e_permute/CMakeLists.txt rename to example/43_splitk_gemm_bias_e_permute/CMakeLists.txt diff --git a/composable_kernel/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp b/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp rename to example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp diff --git a/composable_kernel/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp b/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp rename to example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp diff --git a/composable_kernel/example/44_elementwise_permute/CMakeLists.txt b/example/44_elementwise_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/example/44_elementwise_permute/CMakeLists.txt rename to example/44_elementwise_permute/CMakeLists.txt diff --git a/composable_kernel/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp similarity index 100% rename from composable_kernel/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp rename to example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp diff --git a/composable_kernel/example/44_elementwise_permute/elementwise_permute_4D_fp16_2d.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16_2d.cpp similarity index 100% rename from composable_kernel/example/44_elementwise_permute/elementwise_permute_4D_fp16_2d.cpp rename to example/44_elementwise_permute/elementwise_permute_4D_fp16_2d.cpp diff --git a/composable_kernel/example/45_elementwise_normalization/CMakeLists.txt b/example/45_elementwise_normalization/CMakeLists.txt similarity index 100% rename from composable_kernel/example/45_elementwise_normalization/CMakeLists.txt rename to example/45_elementwise_normalization/CMakeLists.txt diff --git a/composable_kernel/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp b/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp similarity index 100% rename from composable_kernel/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp rename to example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp diff --git a/composable_kernel/example/46_gemm_add_multiply/CMakeLists.txt b/example/46_gemm_add_multiply/CMakeLists.txt similarity index 100% rename from composable_kernel/example/46_gemm_add_multiply/CMakeLists.txt rename to example/46_gemm_add_multiply/CMakeLists.txt diff --git a/composable_kernel/example/46_gemm_add_multiply/README.md b/example/46_gemm_add_multiply/README.md similarity index 100% rename from composable_kernel/example/46_gemm_add_multiply/README.md rename to example/46_gemm_add_multiply/README.md diff --git a/composable_kernel/example/46_gemm_add_multiply/common.hpp b/example/46_gemm_add_multiply/common.hpp similarity index 100% rename from composable_kernel/example/46_gemm_add_multiply/common.hpp rename to example/46_gemm_add_multiply/common.hpp diff --git a/composable_kernel/example/46_gemm_add_multiply/gemm_add_multiply_dl_fp16.cpp b/example/46_gemm_add_multiply/gemm_add_multiply_dl_fp16.cpp similarity index 100% rename from composable_kernel/example/46_gemm_add_multiply/gemm_add_multiply_dl_fp16.cpp rename to example/46_gemm_add_multiply/gemm_add_multiply_dl_fp16.cpp diff --git a/composable_kernel/example/46_gemm_add_multiply/gemm_add_multiply_xdl_fp16.cpp b/example/46_gemm_add_multiply/gemm_add_multiply_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/46_gemm_add_multiply/gemm_add_multiply_xdl_fp16.cpp rename to example/46_gemm_add_multiply/gemm_add_multiply_xdl_fp16.cpp diff --git a/composable_kernel/example/46_gemm_add_multiply/run_gemm_add_multiply_example.inc b/example/46_gemm_add_multiply/run_gemm_add_multiply_example.inc similarity index 100% rename from composable_kernel/example/46_gemm_add_multiply/run_gemm_add_multiply_example.inc rename to example/46_gemm_add_multiply/run_gemm_add_multiply_example.inc diff --git a/composable_kernel/example/47_gemm_bias_softmax_gemm_permute/CMakeLists.txt b/example/47_gemm_bias_softmax_gemm_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/example/47_gemm_bias_softmax_gemm_permute/CMakeLists.txt rename to example/47_gemm_bias_softmax_gemm_permute/CMakeLists.txt diff --git a/composable_kernel/example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute.cpp b/example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute.cpp similarity index 100% rename from composable_kernel/example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute.cpp rename to example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute.cpp diff --git a/composable_kernel/example/48_pool3d_fwd/CMakeLists.txt b/example/48_pool3d_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/example/48_pool3d_fwd/CMakeLists.txt rename to example/48_pool3d_fwd/CMakeLists.txt diff --git a/composable_kernel/example/48_pool3d_fwd/pool3d_fwd_common.hpp b/example/48_pool3d_fwd/pool3d_fwd_common.hpp similarity index 100% rename from composable_kernel/example/48_pool3d_fwd/pool3d_fwd_common.hpp rename to example/48_pool3d_fwd/pool3d_fwd_common.hpp diff --git a/composable_kernel/example/48_pool3d_fwd/pool3d_fwd_fp16.cpp b/example/48_pool3d_fwd/pool3d_fwd_fp16.cpp similarity index 100% rename from composable_kernel/example/48_pool3d_fwd/pool3d_fwd_fp16.cpp rename to example/48_pool3d_fwd/pool3d_fwd_fp16.cpp diff --git a/composable_kernel/example/49_maxpool2d_bwd/CMakeLists.txt b/example/49_maxpool2d_bwd/CMakeLists.txt similarity index 100% rename from composable_kernel/example/49_maxpool2d_bwd/CMakeLists.txt rename to example/49_maxpool2d_bwd/CMakeLists.txt diff --git a/composable_kernel/example/49_maxpool2d_bwd/maxpool2d_bwd_bf16.cpp b/example/49_maxpool2d_bwd/maxpool2d_bwd_bf16.cpp similarity index 100% rename from composable_kernel/example/49_maxpool2d_bwd/maxpool2d_bwd_bf16.cpp rename to example/49_maxpool2d_bwd/maxpool2d_bwd_bf16.cpp diff --git a/composable_kernel/example/49_maxpool2d_bwd/maxpool2d_bwd_common.hpp b/example/49_maxpool2d_bwd/maxpool2d_bwd_common.hpp similarity index 100% rename from composable_kernel/example/49_maxpool2d_bwd/maxpool2d_bwd_common.hpp rename to example/49_maxpool2d_bwd/maxpool2d_bwd_common.hpp diff --git a/composable_kernel/example/49_maxpool2d_bwd/maxpool2d_bwd_fp16.cpp b/example/49_maxpool2d_bwd/maxpool2d_bwd_fp16.cpp similarity index 100% rename from composable_kernel/example/49_maxpool2d_bwd/maxpool2d_bwd_fp16.cpp rename to example/49_maxpool2d_bwd/maxpool2d_bwd_fp16.cpp diff --git a/composable_kernel/example/49_maxpool2d_bwd/maxpool2d_bwd_fp32.cpp b/example/49_maxpool2d_bwd/maxpool2d_bwd_fp32.cpp similarity index 100% rename from composable_kernel/example/49_maxpool2d_bwd/maxpool2d_bwd_fp32.cpp rename to example/49_maxpool2d_bwd/maxpool2d_bwd_fp32.cpp diff --git a/composable_kernel/example/50_put_element/CMakeLists.txt b/example/50_put_element/CMakeLists.txt similarity index 100% rename from composable_kernel/example/50_put_element/CMakeLists.txt rename to example/50_put_element/CMakeLists.txt diff --git a/composable_kernel/example/50_put_element/put_element_fp16.cpp b/example/50_put_element/put_element_fp16.cpp similarity index 100% rename from composable_kernel/example/50_put_element/put_element_fp16.cpp rename to example/50_put_element/put_element_fp16.cpp diff --git a/composable_kernel/example/51_avgpool3d_bwd/CMakeLists.txt b/example/51_avgpool3d_bwd/CMakeLists.txt similarity index 100% rename from composable_kernel/example/51_avgpool3d_bwd/CMakeLists.txt rename to example/51_avgpool3d_bwd/CMakeLists.txt diff --git a/composable_kernel/example/51_avgpool3d_bwd/avgpool3d_bwd_bf16.cpp b/example/51_avgpool3d_bwd/avgpool3d_bwd_bf16.cpp similarity index 100% rename from composable_kernel/example/51_avgpool3d_bwd/avgpool3d_bwd_bf16.cpp rename to example/51_avgpool3d_bwd/avgpool3d_bwd_bf16.cpp diff --git a/composable_kernel/example/51_avgpool3d_bwd/avgpool3d_bwd_common.hpp b/example/51_avgpool3d_bwd/avgpool3d_bwd_common.hpp similarity index 100% rename from composable_kernel/example/51_avgpool3d_bwd/avgpool3d_bwd_common.hpp rename to example/51_avgpool3d_bwd/avgpool3d_bwd_common.hpp diff --git a/composable_kernel/example/51_avgpool3d_bwd/avgpool3d_bwd_fp16.cpp b/example/51_avgpool3d_bwd/avgpool3d_bwd_fp16.cpp similarity index 100% rename from composable_kernel/example/51_avgpool3d_bwd/avgpool3d_bwd_fp16.cpp rename to example/51_avgpool3d_bwd/avgpool3d_bwd_fp16.cpp diff --git a/composable_kernel/example/51_avgpool3d_bwd/avgpool3d_bwd_fp32.cpp b/example/51_avgpool3d_bwd/avgpool3d_bwd_fp32.cpp similarity index 100% rename from composable_kernel/example/51_avgpool3d_bwd/avgpool3d_bwd_fp32.cpp rename to example/51_avgpool3d_bwd/avgpool3d_bwd_fp32.cpp diff --git a/composable_kernel/example/52_im2col_col2im/CMakeLists.txt b/example/52_im2col_col2im/CMakeLists.txt similarity index 100% rename from composable_kernel/example/52_im2col_col2im/CMakeLists.txt rename to example/52_im2col_col2im/CMakeLists.txt diff --git a/composable_kernel/example/52_im2col_col2im/column_to_image_f32.cpp b/example/52_im2col_col2im/column_to_image_f32.cpp similarity index 100% rename from composable_kernel/example/52_im2col_col2im/column_to_image_f32.cpp rename to example/52_im2col_col2im/column_to_image_f32.cpp diff --git a/composable_kernel/example/52_im2col_col2im/common.hpp b/example/52_im2col_col2im/common.hpp similarity index 100% rename from composable_kernel/example/52_im2col_col2im/common.hpp rename to example/52_im2col_col2im/common.hpp diff --git a/composable_kernel/example/52_im2col_col2im/image_to_column_f32.cpp b/example/52_im2col_col2im/image_to_column_f32.cpp similarity index 100% rename from composable_kernel/example/52_im2col_col2im/image_to_column_f32.cpp rename to example/52_im2col_col2im/image_to_column_f32.cpp diff --git a/composable_kernel/example/53_gemv_splitk/CMakeLists.txt b/example/53_gemv_splitk/CMakeLists.txt similarity index 100% rename from composable_kernel/example/53_gemv_splitk/CMakeLists.txt rename to example/53_gemv_splitk/CMakeLists.txt diff --git a/composable_kernel/example/53_gemv_splitk/README.md b/example/53_gemv_splitk/README.md similarity index 100% rename from composable_kernel/example/53_gemv_splitk/README.md rename to example/53_gemv_splitk/README.md diff --git a/composable_kernel/example/53_gemv_splitk/common.hpp b/example/53_gemv_splitk/common.hpp similarity index 100% rename from composable_kernel/example/53_gemv_splitk/common.hpp rename to example/53_gemv_splitk/common.hpp diff --git a/composable_kernel/example/53_gemv_splitk/gemv_splitk_fp16.cpp b/example/53_gemv_splitk/gemv_splitk_fp16.cpp similarity index 100% rename from composable_kernel/example/53_gemv_splitk/gemv_splitk_fp16.cpp rename to example/53_gemv_splitk/gemv_splitk_fp16.cpp diff --git a/composable_kernel/example/53_gemv_splitk/run_gemv_splitk_example.inc b/example/53_gemv_splitk/run_gemv_splitk_example.inc similarity index 100% rename from composable_kernel/example/53_gemv_splitk/run_gemv_splitk_example.inc rename to example/53_gemv_splitk/run_gemv_splitk_example.inc diff --git a/composable_kernel/example/54_tall_and_skinny_gemm_splitk/CMakeLists.txt b/example/54_tall_and_skinny_gemm_splitk/CMakeLists.txt similarity index 100% rename from composable_kernel/example/54_tall_and_skinny_gemm_splitk/CMakeLists.txt rename to example/54_tall_and_skinny_gemm_splitk/CMakeLists.txt diff --git a/composable_kernel/example/54_tall_and_skinny_gemm_splitk/README.md b/example/54_tall_and_skinny_gemm_splitk/README.md similarity index 100% rename from composable_kernel/example/54_tall_and_skinny_gemm_splitk/README.md rename to example/54_tall_and_skinny_gemm_splitk/README.md diff --git a/composable_kernel/example/54_tall_and_skinny_gemm_splitk/common.hpp b/example/54_tall_and_skinny_gemm_splitk/common.hpp similarity index 100% rename from composable_kernel/example/54_tall_and_skinny_gemm_splitk/common.hpp rename to example/54_tall_and_skinny_gemm_splitk/common.hpp diff --git a/composable_kernel/example/54_tall_and_skinny_gemm_splitk/run_tall_and_skinny_gemm_splitk_example.inc b/example/54_tall_and_skinny_gemm_splitk/run_tall_and_skinny_gemm_splitk_example.inc similarity index 100% rename from composable_kernel/example/54_tall_and_skinny_gemm_splitk/run_tall_and_skinny_gemm_splitk_example.inc rename to example/54_tall_and_skinny_gemm_splitk/run_tall_and_skinny_gemm_splitk_example.inc diff --git a/composable_kernel/example/54_tall_and_skinny_gemm_splitk/tall_and_skinny_gemm_splitk_fp16.cpp b/example/54_tall_and_skinny_gemm_splitk/tall_and_skinny_gemm_splitk_fp16.cpp similarity index 100% rename from composable_kernel/example/54_tall_and_skinny_gemm_splitk/tall_and_skinny_gemm_splitk_fp16.cpp rename to example/54_tall_and_skinny_gemm_splitk/tall_and_skinny_gemm_splitk_fp16.cpp diff --git a/composable_kernel/example/60_gemm_multi_ABD/CMakeLists.txt b/example/60_gemm_multi_ABD/CMakeLists.txt similarity index 100% rename from composable_kernel/example/60_gemm_multi_ABD/CMakeLists.txt rename to example/60_gemm_multi_ABD/CMakeLists.txt diff --git a/composable_kernel/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp b/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp rename to example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp diff --git a/composable_kernel/example/61_contraction_multi_ABD/CMakeLists.txt b/example/61_contraction_multi_ABD/CMakeLists.txt similarity index 100% rename from composable_kernel/example/61_contraction_multi_ABD/CMakeLists.txt rename to example/61_contraction_multi_ABD/CMakeLists.txt diff --git a/composable_kernel/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp16.cpp b/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp16.cpp rename to example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/CMakeLists.txt b/example/62_conv_fwd_activ/CMakeLists.txt similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/CMakeLists.txt rename to example/62_conv_fwd_activ/CMakeLists.txt diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_activ_common.hpp b/example/62_conv_fwd_activ/convnd_fwd_activ_common.hpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_activ_common.hpp rename to example/62_conv_fwd_activ/convnd_fwd_activ_common.hpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_abs_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_abs_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_abs_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_abs_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_clippedrelu_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_clippedrelu_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_clippedrelu_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_clippedrelu_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_elu_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_elu_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_elu_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_elu_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_leakyrelu_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_leakyrelu_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_leakyrelu_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_leakyrelu_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_pow_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_pow_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_pow_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_pow_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_relu_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_relu_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_relu_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_relu_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_sigmoid_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_sigmoid_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_sigmoid_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_sigmoid_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_softrelu_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_softrelu_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_softrelu_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_softrelu_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_tanh_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_tanh_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_tanh_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_tanh_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/run_convnd_fwd_activ_example.inc b/example/62_conv_fwd_activ/run_convnd_fwd_activ_example.inc similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/run_convnd_fwd_activ_example.inc rename to example/62_conv_fwd_activ/run_convnd_fwd_activ_example.inc diff --git a/composable_kernel/example/CMakeLists.txt b/example/CMakeLists.txt similarity index 100% rename from composable_kernel/example/CMakeLists.txt rename to example/CMakeLists.txt diff --git a/composable_kernel/include/ck/ck.hpp b/include/ck/ck.hpp similarity index 100% rename from composable_kernel/include/ck/ck.hpp rename to include/ck/ck.hpp diff --git a/composable_kernel/include/ck/config.h.in b/include/ck/config.h.in similarity index 100% rename from composable_kernel/include/ck/config.h.in rename to include/ck/config.h.in diff --git a/composable_kernel/include/ck/host_utility/device_prop.hpp b/include/ck/host_utility/device_prop.hpp similarity index 100% rename from composable_kernel/include/ck/host_utility/device_prop.hpp rename to include/ck/host_utility/device_prop.hpp diff --git a/composable_kernel/include/ck/host_utility/hip_check_error.hpp b/include/ck/host_utility/hip_check_error.hpp similarity index 100% rename from composable_kernel/include/ck/host_utility/hip_check_error.hpp rename to include/ck/host_utility/hip_check_error.hpp diff --git a/composable_kernel/include/ck/host_utility/io.hpp b/include/ck/host_utility/io.hpp similarity index 100% rename from composable_kernel/include/ck/host_utility/io.hpp rename to include/ck/host_utility/io.hpp diff --git a/composable_kernel/include/ck/host_utility/kernel_launch.hpp b/include/ck/host_utility/kernel_launch.hpp similarity index 100% rename from composable_kernel/include/ck/host_utility/kernel_launch.hpp rename to include/ck/host_utility/kernel_launch.hpp diff --git a/composable_kernel/include/ck/host_utility/stream_utility.hpp b/include/ck/host_utility/stream_utility.hpp similarity index 100% rename from composable_kernel/include/ck/host_utility/stream_utility.hpp rename to include/ck/host_utility/stream_utility.hpp diff --git a/composable_kernel/include/ck/problem_transform/transform_forward_convolution3d_into_gemm_v4r4r4_ndhwc_kzyxc_ndhwk.hpp b/include/ck/problem_transform/transform_forward_convolution3d_into_gemm_v4r4r4_ndhwc_kzyxc_ndhwk.hpp similarity index 100% rename from composable_kernel/include/ck/problem_transform/transform_forward_convolution3d_into_gemm_v4r4r4_ndhwc_kzyxc_ndhwk.hpp rename to include/ck/problem_transform/transform_forward_convolution3d_into_gemm_v4r4r4_ndhwc_kzyxc_ndhwk.hpp diff --git a/composable_kernel/include/ck/stream_config.hpp b/include/ck/stream_config.hpp similarity index 100% rename from composable_kernel/include/ck/stream_config.hpp rename to include/ck/stream_config.hpp diff --git a/composable_kernel/include/ck/tensor/static_tensor.hpp b/include/ck/tensor/static_tensor.hpp similarity index 100% rename from composable_kernel/include/ck/tensor/static_tensor.hpp rename to include/ck/tensor/static_tensor.hpp diff --git a/composable_kernel/include/ck/tensor_description/cluster_descriptor.hpp b/include/ck/tensor_description/cluster_descriptor.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_description/cluster_descriptor.hpp rename to include/ck/tensor_description/cluster_descriptor.hpp diff --git a/composable_kernel/include/ck/tensor_description/multi_index_transform.hpp b/include/ck/tensor_description/multi_index_transform.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_description/multi_index_transform.hpp rename to include/ck/tensor_description/multi_index_transform.hpp diff --git a/composable_kernel/include/ck/tensor_description/multi_index_transform_helper.hpp b/include/ck/tensor_description/multi_index_transform_helper.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_description/multi_index_transform_helper.hpp rename to include/ck/tensor_description/multi_index_transform_helper.hpp diff --git a/composable_kernel/include/ck/tensor_description/tensor_adaptor.hpp b/include/ck/tensor_description/tensor_adaptor.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_description/tensor_adaptor.hpp rename to include/ck/tensor_description/tensor_adaptor.hpp diff --git a/composable_kernel/include/ck/tensor_description/tensor_descriptor.hpp b/include/ck/tensor_description/tensor_descriptor.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_description/tensor_descriptor.hpp rename to include/ck/tensor_description/tensor_descriptor.hpp diff --git a/composable_kernel/include/ck/tensor_description/tensor_descriptor_helper.hpp b/include/ck/tensor_description/tensor_descriptor_helper.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_description/tensor_descriptor_helper.hpp rename to include/ck/tensor_description/tensor_descriptor_helper.hpp diff --git a/composable_kernel/include/ck/tensor_description/tensor_space_filling_curve.hpp b/include/ck/tensor_description/tensor_space_filling_curve.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_description/tensor_space_filling_curve.hpp rename to include/ck/tensor_description/tensor_space_filling_curve.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_dl_v2r3.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_dl_v2r3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_dl_v2r3.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_gemm_dl_v2r3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v2r2.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v2r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v2r2.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v2r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v3.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v3.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_wmma.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_wmma.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_gemm_wmma.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_softmax.hpp b/include/ck/tensor_operation/gpu/block/blockwise_softmax.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_softmax.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_softmax.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_tensor_slice_transfer_v5r1.hpp b/include/ck/tensor_operation/gpu/block/blockwise_tensor_slice_transfer_v5r1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_tensor_slice_transfer_v5r1.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_tensor_slice_transfer_v5r1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_welford.hpp b/include/ck/tensor_operation/gpu/block/blockwise_welford.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_welford.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_welford.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/reduction_functions_blockwise.hpp b/include/ck/tensor_operation/gpu/block/reduction_functions_blockwise.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/reduction_functions_blockwise.hpp rename to include/ck/tensor_operation/gpu/block/reduction_functions_blockwise.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp rename to include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1.hpp rename to include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1r2.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1r2.hpp rename to include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r2.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r2.hpp rename to include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r3.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r3.hpp rename to include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7.hpp rename to include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7r2.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7r2.hpp rename to include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/conv_tensor_rearrange_op.hpp b/include/ck/tensor_operation/gpu/device/conv_tensor_rearrange_op.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/conv_tensor_rearrange_op.hpp rename to include/ck/tensor_operation/gpu/device/conv_tensor_rearrange_op.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/convolution_backward_data_specialization.hpp b/include/ck/tensor_operation/gpu/device/convolution_backward_data_specialization.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/convolution_backward_data_specialization.hpp rename to include/ck/tensor_operation/gpu/device/convolution_backward_data_specialization.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp b/include/ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp rename to include/ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp b/include/ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp rename to include/ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_avgpool_bwd.hpp b/include/ck/tensor_operation/gpu/device/device_avgpool_bwd.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_avgpool_bwd.hpp rename to include/ck/tensor_operation/gpu/device/device_avgpool_bwd.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_base.hpp b/include/ck/tensor_operation/gpu/device/device_base.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_base.hpp rename to include/ck/tensor_operation/gpu/device/device_base.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_contraction_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_batched_contraction_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_contraction_multiple_d.hpp rename to include/ck/tensor_operation/gpu/device/device_batched_contraction_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm.hpp rename to include/ck/tensor_operation/gpu/device/device_batched_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_e_permute.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_e_permute.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_e_permute.hpp rename to include/ck/tensor_operation/gpu/device/device_batched_gemm_e_permute.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_gemm.hpp rename to include/ck/tensor_operation/gpu/device/device_batched_gemm_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_multi_d.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_multi_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_multi_d.hpp rename to include/ck/tensor_operation/gpu/device/device_batched_gemm_multi_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_multiple_d_gemm_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_multiple_d_gemm_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_multiple_d_gemm_multiple_d.hpp rename to include/ck/tensor_operation/gpu/device/device_batched_gemm_multiple_d_gemm_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm.hpp rename to include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_permute.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_permute.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_permute.hpp rename to include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_permute.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batchnorm_backward.hpp b/include/ck/tensor_operation/gpu/device/device_batchnorm_backward.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batchnorm_backward.hpp rename to include/ck/tensor_operation/gpu/device/device_batchnorm_backward.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batchnorm_forward.hpp b/include/ck/tensor_operation/gpu/device/device_batchnorm_forward.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batchnorm_forward.hpp rename to include/ck/tensor_operation/gpu/device/device_batchnorm_forward.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batchnorm_infer.hpp b/include/ck/tensor_operation/gpu/device/device_batchnorm_infer.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batchnorm_infer.hpp rename to include/ck/tensor_operation/gpu/device/device_batchnorm_infer.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_cgemm.hpp b/include/ck/tensor_operation/gpu/device/device_cgemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_cgemm.hpp rename to include/ck/tensor_operation/gpu/device/device_cgemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_contraction_multiple_abd.hpp b/include/ck/tensor_operation/gpu/device/device_contraction_multiple_abd.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_contraction_multiple_abd.hpp rename to include/ck/tensor_operation/gpu/device/device_contraction_multiple_abd.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_contraction_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_contraction_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_contraction_multiple_d.hpp rename to include/ck/tensor_operation/gpu/device/device_contraction_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_bwd_data.hpp b/include/ck/tensor_operation/gpu/device/device_conv_bwd_data.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_bwd_data.hpp rename to include/ck/tensor_operation/gpu/device/device_conv_bwd_data.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_fwd.hpp b/include/ck/tensor_operation/gpu/device/device_conv_fwd.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_fwd.hpp rename to include/ck/tensor_operation/gpu/device/device_conv_fwd.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation.hpp b/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation.hpp rename to include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation_add.hpp b/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation_add.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation_add.hpp rename to include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation_add.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_tensor_rearrange.hpp b/include/ck/tensor_operation/gpu/device/device_conv_tensor_rearrange.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_tensor_rearrange.hpp rename to include/ck/tensor_operation/gpu/device/device_conv_tensor_rearrange.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_elementwise.hpp b/include/ck/tensor_operation/gpu/device/device_elementwise.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_elementwise.hpp rename to include/ck/tensor_operation/gpu/device/device_elementwise.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_elementwise_normalization.hpp b/include/ck/tensor_operation/gpu/device/device_elementwise_normalization.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_elementwise_normalization.hpp rename to include/ck/tensor_operation/gpu/device/device_elementwise_normalization.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_bias_e_permute.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_bias_e_permute.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_bias_e_permute.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm_bias_e_permute.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_multiple_abd.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_multiple_abd.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_multiple_abd.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm_multiple_abd.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_layernorm.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_layernorm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_layernorm.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_layernorm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_multiple_r.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_multiple_r.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_multiple_r.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_multiple_r.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_reduce.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_reduce.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_reduce.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm_reduce.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_splitk.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_splitk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_splitk.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm_splitk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_streamk.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_streamk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_streamk.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm_streamk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_contraction_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_contraction_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_contraction_multiple_d.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_contraction_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_gemm_fixed_nk.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm_fixed_nk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_gemm_fixed_nk.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_gemm_fixed_nk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_gemm_splitk.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm_splitk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_gemm_splitk.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_gemm_splitk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_max_pool_bwd.hpp b/include/ck/tensor_operation/gpu/device/device_max_pool_bwd.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_max_pool_bwd.hpp rename to include/ck/tensor_operation/gpu/device/device_max_pool_bwd.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_multiple_reduce.hpp b/include/ck/tensor_operation/gpu/device/device_multiple_reduce.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_multiple_reduce.hpp rename to include/ck/tensor_operation/gpu/device/device_multiple_reduce.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_normalization.hpp b/include/ck/tensor_operation/gpu/device/device_normalization.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_normalization.hpp rename to include/ck/tensor_operation/gpu/device/device_normalization.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_permute.hpp b/include/ck/tensor_operation/gpu/device/device_permute.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_permute.hpp rename to include/ck/tensor_operation/gpu/device/device_permute.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_pool_fwd.hpp b/include/ck/tensor_operation/gpu/device/device_pool_fwd.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_pool_fwd.hpp rename to include/ck/tensor_operation/gpu/device/device_pool_fwd.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_put_element.hpp b/include/ck/tensor_operation/gpu/device/device_put_element.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_put_element.hpp rename to include/ck/tensor_operation/gpu/device/device_put_element.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_reduce.hpp b/include/ck/tensor_operation/gpu/device/device_reduce.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_reduce.hpp rename to include/ck/tensor_operation/gpu/device/device_reduce.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_softmax.hpp b/include/ck/tensor_operation/gpu/device/device_softmax.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_softmax.hpp rename to include/ck/tensor_operation/gpu/device/device_softmax.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_splitk_contraction_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_splitk_contraction_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_splitk_contraction_multiple_d.hpp rename to include/ck/tensor_operation/gpu/device/device_splitk_contraction_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_tall_and_skinny_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_tall_and_skinny_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_tall_and_skinny_gemm.hpp rename to include/ck/tensor_operation/gpu/device/device_tall_and_skinny_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/gemm_specialization.hpp b/include/ck/tensor_operation/gpu/device/gemm_specialization.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/gemm_specialization.hpp rename to include/ck/tensor_operation/gpu/device/gemm_specialization.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_avgpool3d_bwd_ndhwc_ndhwc.hpp b/include/ck/tensor_operation/gpu/device/impl/device_avgpool3d_bwd_ndhwc_ndhwc.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_avgpool3d_bwd_ndhwc_ndhwc.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_avgpool3d_bwd_ndhwc_ndhwc.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multi_d_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multi_d_xdl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multi_d_xdl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multi_d_xdl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_xdl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_xdl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_xdl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_backward_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_backward_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_backward_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batchnorm_backward_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl_obsolete.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl_obsolete.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl_obsolete.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl_obsolete.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_cgemm_4gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_cgemm_4gemm_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_cgemm_4gemm_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_cgemm_4gemm_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_abd_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_abd_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_abd_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_abd_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_elementwise_2d_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_elementwise_2d_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_elementwise_2d_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_elementwise_2d_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_elementwise_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_elementwise_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_elementwise_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_elementwise_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_elementwise_normalization_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_elementwise_normalization_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_elementwise_normalization_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_elementwise_normalization_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_bias_add_reduce_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_bias_add_reduce_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_bias_add_reduce_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_bias_add_reduce_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_dl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_dl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_dl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_wmma_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_wmma_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_wmma_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_streamk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_streamk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_streamk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_streamk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_waveletmodel_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_waveletmodel_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_waveletmodel_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_waveletmodel_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_dl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_dl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_dl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_nhwc_kyxc_nhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_nhwc_kyxc_nhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_nhwc_kyxc_nhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_fixed_nk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_fixed_nk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_fixed_nk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_fixed_nk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_max_pool_bwd_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_max_pool_bwd_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_max_pool_bwd_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_max_pool_bwd_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_multiblock.hpp b/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_multiblock.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_multiblock.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_multiblock.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_threadwise.hpp b/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_threadwise.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_threadwise.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_threadwise.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_normalization_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_normalization_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_normalization_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_normalization_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_normalization_splitk_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_normalization_splitk_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_normalization_splitk_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_normalization_splitk_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_permute_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_permute_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_permute_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_permute_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp b/include/ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp b/include/ck/tensor_operation/gpu/device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_put_element_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_put_element_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_put_element_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_put_element_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_reduce_common.hpp b/include/ck/tensor_operation/gpu/device/impl/device_reduce_common.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_reduce_common.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_reduce_common.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp b/include/ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_reduce_threadwise.hpp b/include/ck/tensor_operation/gpu/device/impl/device_reduce_threadwise.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_reduce_threadwise.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_reduce_threadwise.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_sparse_embeddings_forward_layernorm.hpp b/include/ck/tensor_operation/gpu/device/impl/device_sparse_embeddings_forward_layernorm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_sparse_embeddings_forward_layernorm.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_sparse_embeddings_forward_layernorm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_tall_and_skinny_gemm_splitk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_tall_and_skinny_gemm_splitk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_tall_and_skinny_gemm_splitk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_tall_and_skinny_gemm_splitk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/masking_specialization.hpp b/include/ck/tensor_operation/gpu/device/masking_specialization.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/masking_specialization.hpp rename to include/ck/tensor_operation/gpu/device/masking_specialization.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/matrix_padder.hpp b/include/ck/tensor_operation/gpu/device/matrix_padder.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/matrix_padder.hpp rename to include/ck/tensor_operation/gpu/device/matrix_padder.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp b/include/ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp rename to include/ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/tensor_layout.hpp b/include/ck/tensor_operation/gpu/device/tensor_layout.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/tensor_layout.hpp rename to include/ck/tensor_operation/gpu/device/tensor_layout.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/tensor_specialization.hpp b/include/ck/tensor_operation/gpu/device/tensor_specialization.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/tensor_specialization.hpp rename to include/ck/tensor_operation/gpu/device/tensor_specialization.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/welford_helper.hpp b/include/ck/tensor_operation/gpu/device/welford_helper.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/welford_helper.hpp rename to include/ck/tensor_operation/gpu/device/welford_helper.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp rename to include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/element/element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/element_wise_operation.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/element/element_wise_operation.hpp rename to include/ck/tensor_operation/gpu/element/element_wise_operation.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/element/quantization_operation.hpp b/include/ck/tensor_operation/gpu/element/quantization_operation.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/element/quantization_operation.hpp rename to include/ck/tensor_operation/gpu/element/quantization_operation.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp rename to include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_batchnorm_forward.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_batchnorm_forward.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_batchnorm_forward.hpp rename to include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_batchnorm_forward.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp rename to include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_first_half.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_first_half.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_first_half.hpp rename to include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_first_half.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp rename to include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp rename to include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp b/include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp rename to include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_welford_second_half_layernorm2d.hpp b/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_welford_second_half_layernorm2d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_welford_second_half_layernorm2d.hpp rename to include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_welford_second_half_layernorm2d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_multiblock.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_multiblock.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_multiblock.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_multiblock.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_threadwise.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_threadwise.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_threadwise.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_threadwise.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_backward_blockwise_welford.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_backward_blockwise_welford.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_backward_blockwise_welford.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_backward_blockwise_welford.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_forward_blockwise_welford.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_forward_blockwise_welford.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_forward_blockwise_welford.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_forward_blockwise_welford.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_2d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_2d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_2d.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_elementwise_2d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_selector.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_selector.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_selector.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_selector.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v3.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle_v2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle_v2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle_v2.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle_v2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_waveletmodel.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_waveletmodel.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_waveletmodel.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_waveletmodel.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_b_lds_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_b_lds_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_b_lds_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_b_lds_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_streamk.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_streamk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_streamk.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_streamk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r2.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r3.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemv_splitk.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemv_splitk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemv_splitk.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemv_splitk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_permute.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_permute.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_permute.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_permute.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_set_buffer_value.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_set_buffer_value.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_set_buffer_value.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_set_buffer_value.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_set_multiple_buffer_value.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_set_multiple_buffer_value.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_set_multiple_buffer_value.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_set_multiple_buffer_value.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_softmax.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_softmax.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_softmax.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_softmax.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_sparse_embeddings_forward_layernorm.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_sparse_embeddings_forward_layernorm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_sparse_embeddings_forward_layernorm.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_sparse_embeddings_forward_layernorm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_tall_and_skinny_gemm_splitk.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_tall_and_skinny_gemm_splitk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_tall_and_skinny_gemm_splitk.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_tall_and_skinny_gemm_splitk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_tensor_rearrange.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_tensor_rearrange.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_tensor_rearrange.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_tensor_rearrange.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_naive_variance.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_naive_variance.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_naive_variance.hpp rename to include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_naive_variance.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_selector.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_selector.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_selector.hpp rename to include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_selector.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_1st.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_1st.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_1st.hpp rename to include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_1st.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_2nd.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_2nd.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_2nd.hpp rename to include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_2nd.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_welford_variance.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_welford_variance.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_welford_variance.hpp rename to include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_welford_variance.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp b/include/ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp rename to include/ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_contraction_dl.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_contraction_dl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_contraction_dl.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_contraction_dl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_gemm_dlops_v3.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_gemm_dlops_v3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_gemm_dlops_v3.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_gemm_dlops_v3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_set.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_set.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_set.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_set.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v4r1.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v4r1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v4r1.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v4r1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v5r1.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v5r1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v5r1.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v5r1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1r2.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1r2.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7r2.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7r2.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_welford.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_welford.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_welford.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_welford.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp b/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp rename to include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp b/include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp rename to include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp b/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp rename to include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm.hpp b/include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm.hpp rename to include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp b/include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp rename to include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp b/include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp rename to include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp diff --git a/composable_kernel/include/ck/utility/amd_address_space.hpp b/include/ck/utility/amd_address_space.hpp similarity index 100% rename from composable_kernel/include/ck/utility/amd_address_space.hpp rename to include/ck/utility/amd_address_space.hpp diff --git a/composable_kernel/include/ck/utility/amd_buffer_addressing.hpp b/include/ck/utility/amd_buffer_addressing.hpp similarity index 100% rename from composable_kernel/include/ck/utility/amd_buffer_addressing.hpp rename to include/ck/utility/amd_buffer_addressing.hpp diff --git a/composable_kernel/include/ck/utility/amd_gemm_dpp.hpp b/include/ck/utility/amd_gemm_dpp.hpp similarity index 100% rename from composable_kernel/include/ck/utility/amd_gemm_dpp.hpp rename to include/ck/utility/amd_gemm_dpp.hpp diff --git a/composable_kernel/include/ck/utility/amd_inline_asm.hpp b/include/ck/utility/amd_inline_asm.hpp similarity index 100% rename from composable_kernel/include/ck/utility/amd_inline_asm.hpp rename to include/ck/utility/amd_inline_asm.hpp diff --git a/composable_kernel/include/ck/utility/amd_wave_read_first_lane.hpp b/include/ck/utility/amd_wave_read_first_lane.hpp similarity index 100% rename from composable_kernel/include/ck/utility/amd_wave_read_first_lane.hpp rename to include/ck/utility/amd_wave_read_first_lane.hpp diff --git a/composable_kernel/include/ck/utility/amd_wmma.hpp b/include/ck/utility/amd_wmma.hpp similarity index 100% rename from composable_kernel/include/ck/utility/amd_wmma.hpp rename to include/ck/utility/amd_wmma.hpp diff --git a/composable_kernel/include/ck/utility/amd_xdlops.hpp b/include/ck/utility/amd_xdlops.hpp similarity index 100% rename from composable_kernel/include/ck/utility/amd_xdlops.hpp rename to include/ck/utility/amd_xdlops.hpp diff --git a/composable_kernel/include/ck/utility/array.hpp b/include/ck/utility/array.hpp similarity index 100% rename from composable_kernel/include/ck/utility/array.hpp rename to include/ck/utility/array.hpp diff --git a/composable_kernel/include/ck/utility/array_multi_index.hpp b/include/ck/utility/array_multi_index.hpp similarity index 100% rename from composable_kernel/include/ck/utility/array_multi_index.hpp rename to include/ck/utility/array_multi_index.hpp diff --git a/composable_kernel/include/ck/utility/c_style_pointer_cast.hpp b/include/ck/utility/c_style_pointer_cast.hpp similarity index 100% rename from composable_kernel/include/ck/utility/c_style_pointer_cast.hpp rename to include/ck/utility/c_style_pointer_cast.hpp diff --git a/composable_kernel/include/ck/utility/common_header.hpp b/include/ck/utility/common_header.hpp similarity index 100% rename from composable_kernel/include/ck/utility/common_header.hpp rename to include/ck/utility/common_header.hpp diff --git a/composable_kernel/include/ck/utility/container_element_picker.hpp b/include/ck/utility/container_element_picker.hpp similarity index 100% rename from composable_kernel/include/ck/utility/container_element_picker.hpp rename to include/ck/utility/container_element_picker.hpp diff --git a/composable_kernel/include/ck/utility/container_helper.hpp b/include/ck/utility/container_helper.hpp similarity index 100% rename from composable_kernel/include/ck/utility/container_helper.hpp rename to include/ck/utility/container_helper.hpp diff --git a/composable_kernel/include/ck/utility/data_type.hpp b/include/ck/utility/data_type.hpp similarity index 100% rename from composable_kernel/include/ck/utility/data_type.hpp rename to include/ck/utility/data_type.hpp diff --git a/composable_kernel/include/ck/utility/debug.hpp b/include/ck/utility/debug.hpp similarity index 100% rename from composable_kernel/include/ck/utility/debug.hpp rename to include/ck/utility/debug.hpp diff --git a/composable_kernel/include/ck/utility/dynamic_buffer.hpp b/include/ck/utility/dynamic_buffer.hpp similarity index 100% rename from composable_kernel/include/ck/utility/dynamic_buffer.hpp rename to include/ck/utility/dynamic_buffer.hpp diff --git a/composable_kernel/include/ck/utility/enable_if.hpp b/include/ck/utility/enable_if.hpp similarity index 100% rename from composable_kernel/include/ck/utility/enable_if.hpp rename to include/ck/utility/enable_if.hpp diff --git a/composable_kernel/include/ck/utility/f8_utils.hpp b/include/ck/utility/f8_utils.hpp similarity index 100% rename from composable_kernel/include/ck/utility/f8_utils.hpp rename to include/ck/utility/f8_utils.hpp diff --git a/composable_kernel/include/ck/utility/functional.hpp b/include/ck/utility/functional.hpp similarity index 100% rename from composable_kernel/include/ck/utility/functional.hpp rename to include/ck/utility/functional.hpp diff --git a/composable_kernel/include/ck/utility/functional2.hpp b/include/ck/utility/functional2.hpp similarity index 100% rename from composable_kernel/include/ck/utility/functional2.hpp rename to include/ck/utility/functional2.hpp diff --git a/composable_kernel/include/ck/utility/functional3.hpp b/include/ck/utility/functional3.hpp similarity index 100% rename from composable_kernel/include/ck/utility/functional3.hpp rename to include/ck/utility/functional3.hpp diff --git a/composable_kernel/include/ck/utility/functional4.hpp b/include/ck/utility/functional4.hpp similarity index 100% rename from composable_kernel/include/ck/utility/functional4.hpp rename to include/ck/utility/functional4.hpp diff --git a/composable_kernel/include/ck/utility/generic_memory_space_atomic.hpp b/include/ck/utility/generic_memory_space_atomic.hpp similarity index 100% rename from composable_kernel/include/ck/utility/generic_memory_space_atomic.hpp rename to include/ck/utility/generic_memory_space_atomic.hpp diff --git a/composable_kernel/include/ck/utility/get_id.hpp b/include/ck/utility/get_id.hpp similarity index 100% rename from composable_kernel/include/ck/utility/get_id.hpp rename to include/ck/utility/get_id.hpp diff --git a/composable_kernel/include/ck/utility/get_shift.hpp b/include/ck/utility/get_shift.hpp similarity index 100% rename from composable_kernel/include/ck/utility/get_shift.hpp rename to include/ck/utility/get_shift.hpp diff --git a/composable_kernel/include/ck/utility/ignore.hpp b/include/ck/utility/ignore.hpp similarity index 100% rename from composable_kernel/include/ck/utility/ignore.hpp rename to include/ck/utility/ignore.hpp diff --git a/composable_kernel/include/ck/utility/inner_product.hpp b/include/ck/utility/inner_product.hpp similarity index 100% rename from composable_kernel/include/ck/utility/inner_product.hpp rename to include/ck/utility/inner_product.hpp diff --git a/composable_kernel/include/ck/utility/inner_product_dpp8.hpp b/include/ck/utility/inner_product_dpp8.hpp similarity index 100% rename from composable_kernel/include/ck/utility/inner_product_dpp8.hpp rename to include/ck/utility/inner_product_dpp8.hpp diff --git a/composable_kernel/include/ck/utility/integral_constant.hpp b/include/ck/utility/integral_constant.hpp similarity index 100% rename from composable_kernel/include/ck/utility/integral_constant.hpp rename to include/ck/utility/integral_constant.hpp diff --git a/composable_kernel/include/ck/utility/is_detected.hpp b/include/ck/utility/is_detected.hpp similarity index 100% rename from composable_kernel/include/ck/utility/is_detected.hpp rename to include/ck/utility/is_detected.hpp diff --git a/composable_kernel/include/ck/utility/is_known_at_compile_time.hpp b/include/ck/utility/is_known_at_compile_time.hpp similarity index 100% rename from composable_kernel/include/ck/utility/is_known_at_compile_time.hpp rename to include/ck/utility/is_known_at_compile_time.hpp diff --git a/composable_kernel/include/ck/utility/loop_scheduler.hpp b/include/ck/utility/loop_scheduler.hpp similarity index 100% rename from composable_kernel/include/ck/utility/loop_scheduler.hpp rename to include/ck/utility/loop_scheduler.hpp diff --git a/composable_kernel/include/ck/utility/magic_division.hpp b/include/ck/utility/magic_division.hpp similarity index 100% rename from composable_kernel/include/ck/utility/magic_division.hpp rename to include/ck/utility/magic_division.hpp diff --git a/composable_kernel/include/ck/utility/math.hpp b/include/ck/utility/math.hpp similarity index 100% rename from composable_kernel/include/ck/utility/math.hpp rename to include/ck/utility/math.hpp diff --git a/composable_kernel/include/ck/utility/math_v2.hpp b/include/ck/utility/math_v2.hpp similarity index 100% rename from composable_kernel/include/ck/utility/math_v2.hpp rename to include/ck/utility/math_v2.hpp diff --git a/composable_kernel/include/ck/utility/multi_index.hpp b/include/ck/utility/multi_index.hpp similarity index 100% rename from composable_kernel/include/ck/utility/multi_index.hpp rename to include/ck/utility/multi_index.hpp diff --git a/composable_kernel/include/ck/utility/number.hpp b/include/ck/utility/number.hpp similarity index 100% rename from composable_kernel/include/ck/utility/number.hpp rename to include/ck/utility/number.hpp diff --git a/composable_kernel/include/ck/utility/random_gen.hpp b/include/ck/utility/random_gen.hpp similarity index 100% rename from composable_kernel/include/ck/utility/random_gen.hpp rename to include/ck/utility/random_gen.hpp diff --git a/composable_kernel/include/ck/utility/reduction_common.hpp b/include/ck/utility/reduction_common.hpp similarity index 100% rename from composable_kernel/include/ck/utility/reduction_common.hpp rename to include/ck/utility/reduction_common.hpp diff --git a/composable_kernel/include/ck/utility/reduction_enums.hpp b/include/ck/utility/reduction_enums.hpp similarity index 100% rename from composable_kernel/include/ck/utility/reduction_enums.hpp rename to include/ck/utility/reduction_enums.hpp diff --git a/composable_kernel/include/ck/utility/reduction_functions_accumulate.hpp b/include/ck/utility/reduction_functions_accumulate.hpp similarity index 100% rename from composable_kernel/include/ck/utility/reduction_functions_accumulate.hpp rename to include/ck/utility/reduction_functions_accumulate.hpp diff --git a/composable_kernel/include/ck/utility/reduction_operator.hpp b/include/ck/utility/reduction_operator.hpp similarity index 100% rename from composable_kernel/include/ck/utility/reduction_operator.hpp rename to include/ck/utility/reduction_operator.hpp diff --git a/composable_kernel/include/ck/utility/sequence.hpp b/include/ck/utility/sequence.hpp similarity index 100% rename from composable_kernel/include/ck/utility/sequence.hpp rename to include/ck/utility/sequence.hpp diff --git a/composable_kernel/include/ck/utility/sequence_helper.hpp b/include/ck/utility/sequence_helper.hpp similarity index 100% rename from composable_kernel/include/ck/utility/sequence_helper.hpp rename to include/ck/utility/sequence_helper.hpp diff --git a/composable_kernel/include/ck/utility/span.hpp b/include/ck/utility/span.hpp similarity index 100% rename from composable_kernel/include/ck/utility/span.hpp rename to include/ck/utility/span.hpp diff --git a/composable_kernel/include/ck/utility/static_buffer.hpp b/include/ck/utility/static_buffer.hpp similarity index 100% rename from composable_kernel/include/ck/utility/static_buffer.hpp rename to include/ck/utility/static_buffer.hpp diff --git a/composable_kernel/include/ck/utility/statically_indexed_array.hpp b/include/ck/utility/statically_indexed_array.hpp similarity index 100% rename from composable_kernel/include/ck/utility/statically_indexed_array.hpp rename to include/ck/utility/statically_indexed_array.hpp diff --git a/composable_kernel/include/ck/utility/statically_indexed_array_multi_index.hpp b/include/ck/utility/statically_indexed_array_multi_index.hpp similarity index 100% rename from composable_kernel/include/ck/utility/statically_indexed_array_multi_index.hpp rename to include/ck/utility/statically_indexed_array_multi_index.hpp diff --git a/composable_kernel/include/ck/utility/synchronization.hpp b/include/ck/utility/synchronization.hpp similarity index 100% rename from composable_kernel/include/ck/utility/synchronization.hpp rename to include/ck/utility/synchronization.hpp diff --git a/composable_kernel/include/ck/utility/thread_group.hpp b/include/ck/utility/thread_group.hpp similarity index 100% rename from composable_kernel/include/ck/utility/thread_group.hpp rename to include/ck/utility/thread_group.hpp diff --git a/composable_kernel/include/ck/utility/transpose_vectors.hpp b/include/ck/utility/transpose_vectors.hpp similarity index 100% rename from composable_kernel/include/ck/utility/transpose_vectors.hpp rename to include/ck/utility/transpose_vectors.hpp diff --git a/composable_kernel/include/ck/utility/tuple.hpp b/include/ck/utility/tuple.hpp similarity index 100% rename from composable_kernel/include/ck/utility/tuple.hpp rename to include/ck/utility/tuple.hpp diff --git a/composable_kernel/include/ck/utility/tuple_helper.hpp b/include/ck/utility/tuple_helper.hpp similarity index 100% rename from composable_kernel/include/ck/utility/tuple_helper.hpp rename to include/ck/utility/tuple_helper.hpp diff --git a/composable_kernel/include/ck/utility/type.hpp b/include/ck/utility/type.hpp similarity index 100% rename from composable_kernel/include/ck/utility/type.hpp rename to include/ck/utility/type.hpp diff --git a/composable_kernel/include/ck/utility/type_convert.hpp b/include/ck/utility/type_convert.hpp similarity index 100% rename from composable_kernel/include/ck/utility/type_convert.hpp rename to include/ck/utility/type_convert.hpp diff --git a/composable_kernel/include/ck/utility/workgroup_barrier.hpp b/include/ck/utility/workgroup_barrier.hpp similarity index 100% rename from composable_kernel/include/ck/utility/workgroup_barrier.hpp rename to include/ck/utility/workgroup_barrier.hpp diff --git a/composable_kernel/include/ck/utility/workgroup_synchronization.hpp b/include/ck/utility/workgroup_synchronization.hpp similarity index 100% rename from composable_kernel/include/ck/utility/workgroup_synchronization.hpp rename to include/ck/utility/workgroup_synchronization.hpp diff --git a/composable_kernel/include/ck/version.h.in b/include/ck/version.h.in similarity index 100% rename from composable_kernel/include/ck/version.h.in rename to include/ck/version.h.in diff --git a/composable_kernel/library/CMakeLists.txt b/library/CMakeLists.txt similarity index 100% rename from composable_kernel/library/CMakeLists.txt rename to library/CMakeLists.txt diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_avgpool_bwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_avgpool_bwd.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_avgpool_bwd.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_avgpool_bwd.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_backward.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_backward.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_backward.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_backward.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_forward.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_forward.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_forward.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_forward.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_infer.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_infer.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_infer.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_infer.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_contraction.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_contraction.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_contraction.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_contraction.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_reduce.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_reduce.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_reduce.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_reduce.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_softmax.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_softmax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_softmax.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_softmax.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/gpu/naive_conv_fwd.hpp b/library/include/ck/library/reference_tensor_operation/gpu/naive_conv_fwd.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/gpu/naive_conv_fwd.hpp rename to library/include/ck/library/reference_tensor_operation/gpu/naive_conv_fwd.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp b/library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp rename to library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/device_operation_instance_factory.hpp b/library/include/ck/library/tensor_operation_instance/device_operation_instance_factory.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/device_operation_instance_factory.hpp rename to library/include/ck/library/tensor_operation_instance/device_operation_instance_factory.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/avg_pool3d_bwd.hpp b/library/include/ck/library/tensor_operation_instance/gpu/avg_pool3d_bwd.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/avg_pool3d_bwd.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/avg_pool3d_bwd.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batched_gemm.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_permute.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_permute.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_permute.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_permute.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_softmax_gemm_permute.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_softmax_gemm_permute.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_softmax_gemm_permute.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_softmax_gemm_permute.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_gemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_gemm.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_gemm.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_multi_d.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_multi_d.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_multi_d.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_multi_d.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_backward.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_backward.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_backward.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batchnorm_backward.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_forward.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_forward.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_forward.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batchnorm_forward.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_infer.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_infer.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_infer.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batchnorm_infer.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/contraction_bilinear.hpp b/library/include/ck/library/tensor_operation_instance/gpu/contraction_bilinear.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/contraction_bilinear.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/contraction_bilinear.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/contraction_scale.hpp b/library/include/ck/library/tensor_operation_instance/gpu/contraction_scale.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/contraction_scale.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/contraction_scale.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange.hpp b/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/convolution_backward_data.hpp b/library/include/ck/library/tensor_operation_instance/gpu/convolution_backward_data.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/convolution_backward_data.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/convolution_backward_data.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/convolution_forward.hpp b/library/include/ck/library/tensor_operation_instance/gpu/convolution_forward.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/convolution_forward.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/convolution_forward.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/device_elementwise_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/device_elementwise_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/device_elementwise_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/device_elementwise_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_mean_squaremean_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_mean_squaremean_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_mean_squaremean_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/device_gemm_mean_squaremean_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/elementwise_normalization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/elementwise_normalization.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/elementwise_normalization.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/elementwise_normalization.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_add_fastgelu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_add_fastgelu.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_add_fastgelu.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_add_add_fastgelu.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_fastgelu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_fastgelu.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_fastgelu.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_add_fastgelu.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_multiply.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_multiply.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_multiply.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_add_multiply.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_bilinear.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_bilinear.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_bilinear.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_bilinear.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_fastgelu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_fastgelu.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_fastgelu.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_fastgelu.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_multiply_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_multiply_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_multiply_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_multiply_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_streamk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_streamk.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_streamk.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_streamk.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemv_splitk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemv_splitk.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemv_splitk.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemv_splitk.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_wmma_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_wmma_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_wmma_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_wmma_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_dl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_dl_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_dl_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_dl_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_wmma_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_wmma_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_wmma_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_wmma_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv2d_fwd_wmma_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv2d_fwd_wmma_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv2d_fwd_wmma_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv2d_fwd_wmma_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_dl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_dl_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_dl_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_dl_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_weight.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_weight.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_weight.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_weight.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_bias.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_bias.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_bias.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_bias.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fastgelu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fastgelu.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fastgelu.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fastgelu.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fixed_nk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fixed_nk.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fixed_nk.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fixed_nk.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/image_to_column.hpp b/library/include/ck/library/tensor_operation_instance/gpu/image_to_column.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/image_to_column.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/image_to_column.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/max_pool_bwd.hpp b/library/include/ck/library/tensor_operation_instance/gpu/max_pool_bwd.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/max_pool_bwd.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/max_pool_bwd.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/normalization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/normalization.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/normalization.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/normalization.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/normalization_swish.hpp b/library/include/ck/library/tensor_operation_instance/gpu/normalization_swish.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/normalization_swish.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/normalization_swish.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/pool3d_fwd.hpp b/library/include/ck/library/tensor_operation_instance/gpu/pool3d_fwd.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/pool3d_fwd.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/pool3d_fwd.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/gemm_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/gemm_quantization.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/gemm_quantization.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/quantization/gemm_quantization.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perchannel_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perchannel_quantization.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perchannel_quantization.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perchannel_quantization.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perlayer_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perlayer_quantization.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perlayer_quantization.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perlayer_quantization.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perchannel_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perchannel_quantization.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perchannel_quantization.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perchannel_quantization.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perlayer_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perlayer_quantization.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perlayer_quantization.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perlayer_quantization.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_impl_common.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_impl_common.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_impl_common.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_impl_common.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/reduce.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/reduce.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/reduce.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/reduce.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk.hpp diff --git a/composable_kernel/library/include/ck/library/utility/algorithm.hpp b/library/include/ck/library/utility/algorithm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/algorithm.hpp rename to library/include/ck/library/utility/algorithm.hpp diff --git a/composable_kernel/library/include/ck/library/utility/check_err.hpp b/library/include/ck/library/utility/check_err.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/check_err.hpp rename to library/include/ck/library/utility/check_err.hpp diff --git a/composable_kernel/library/include/ck/library/utility/conv_common.hpp b/library/include/ck/library/utility/conv_common.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/conv_common.hpp rename to library/include/ck/library/utility/conv_common.hpp diff --git a/composable_kernel/library/include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp b/library/include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp rename to library/include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp diff --git a/composable_kernel/library/include/ck/library/utility/convolution_parameter.hpp b/library/include/ck/library/utility/convolution_parameter.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/convolution_parameter.hpp rename to library/include/ck/library/utility/convolution_parameter.hpp diff --git a/composable_kernel/library/include/ck/library/utility/device_memory.hpp b/library/include/ck/library/utility/device_memory.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/device_memory.hpp rename to library/include/ck/library/utility/device_memory.hpp diff --git a/composable_kernel/library/include/ck/library/utility/fill.hpp b/library/include/ck/library/utility/fill.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/fill.hpp rename to library/include/ck/library/utility/fill.hpp diff --git a/composable_kernel/library/include/ck/library/utility/host_common_util.hpp b/library/include/ck/library/utility/host_common_util.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/host_common_util.hpp rename to library/include/ck/library/utility/host_common_util.hpp diff --git a/composable_kernel/library/include/ck/library/utility/host_gemm.hpp b/library/include/ck/library/utility/host_gemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/host_gemm.hpp rename to library/include/ck/library/utility/host_gemm.hpp diff --git a/composable_kernel/library/include/ck/library/utility/host_tensor.hpp b/library/include/ck/library/utility/host_tensor.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/host_tensor.hpp rename to library/include/ck/library/utility/host_tensor.hpp diff --git a/composable_kernel/library/include/ck/library/utility/host_tensor_generator.hpp b/library/include/ck/library/utility/host_tensor_generator.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/host_tensor_generator.hpp rename to library/include/ck/library/utility/host_tensor_generator.hpp diff --git a/composable_kernel/library/include/ck/library/utility/iterator.hpp b/library/include/ck/library/utility/iterator.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/iterator.hpp rename to library/include/ck/library/utility/iterator.hpp diff --git a/composable_kernel/library/include/ck/library/utility/literals.hpp b/library/include/ck/library/utility/literals.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/literals.hpp rename to library/include/ck/library/utility/literals.hpp diff --git a/composable_kernel/library/include/ck/library/utility/numeric.hpp b/library/include/ck/library/utility/numeric.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/numeric.hpp rename to library/include/ck/library/utility/numeric.hpp diff --git a/composable_kernel/library/include/ck/library/utility/ranges.hpp b/library/include/ck/library/utility/ranges.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/ranges.hpp rename to library/include/ck/library/utility/ranges.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/avg_pool3d_bwd_ndhwc_instance_common.hpp b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/avg_pool3d_bwd_ndhwc_instance_common.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/avg_pool3d_bwd_ndhwc_instance_common.hpp rename to library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/avg_pool3d_bwd_ndhwc_instance_common.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batched_gemm_gemm/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batchnorm/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batchnorm/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f64_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f64_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f64_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f64_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f64_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f64_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f64_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f64_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f64_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f64_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f64_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f64_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/column_to_image/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/column_to_image/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/column_to_image/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/column_to_image/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_1d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_1d_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_1d_instance.cpp rename to library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_1d_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_2d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_2d_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_2d_instance.cpp rename to library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_2d_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_3d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_3d_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_3d_instance.cpp rename to library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_3d_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/elementwise/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/elementwise/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/elementwise/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/elementwise/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/elementwise/device_normalize_instance.cpp b/library/src/tensor_operation_instance/gpu/elementwise/device_normalize_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/elementwise/device_normalize_instance.cpp rename to library/src/tensor_operation_instance/gpu/elementwise/device_normalize_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/elementwise_normalization/device_elementwise_normalization_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/elementwise_normalization/device_elementwise_normalization_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/elementwise_normalization/device_elementwise_normalization_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/elementwise_normalization/device_elementwise_normalization_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/common.hpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/common.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/common.hpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/common.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_add_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_add_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_add_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_add_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_opt_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_opt_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_opt_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_opt_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_interwave_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_interwave_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_interwave_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v2_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v2_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v2_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_add_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_add_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_add_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_add_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_opt_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_opt_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_opt_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_opt_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_interwave_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_interwave_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_interwave_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v2_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v2_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v2_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_add_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_add_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_add_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_add_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_opt_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_opt_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_opt_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_opt_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_interwave_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_interwave_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_interwave_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_add_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_add_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_add_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_add_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_opt_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_opt_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_opt_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_opt_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_interwave_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_interwave_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_interwave_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_add_multiply/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_fastgelu/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_multiply_add/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_splitk/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_splitk/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_streamk/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_streamk/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_streamk/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_streamk/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_streamk/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_streamk/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_streamk/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_streamk/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemv_splitk/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemv_splitk/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemv_splitk/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemv_splitk/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_gemm_bias/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/image_to_column/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/image_to_column/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/image_to_column/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/image_to_column/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_1d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_1d_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_1d_instance.cpp rename to library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_1d_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_2d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_2d_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_2d_instance.cpp rename to library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_2d_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_3d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_3d_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_3d_instance.cpp rename to library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_3d_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/max_pool_bwd/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/max_pool_bwd/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/max_pool_bwd_instance_common.hpp b/library/src/tensor_operation_instance/gpu/max_pool_bwd/max_pool_bwd_instance_common.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/max_pool_bwd_instance_common.hpp rename to library/src/tensor_operation_instance/gpu/max_pool_bwd/max_pool_bwd_instance_common.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/normalization/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/normalization/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_f32_f32_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_f32_f32_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_f32_f32_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_f32_f32_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/normalization_instance_common.hpp b/library/src/tensor_operation_instance/gpu/normalization/normalization_instance_common.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/normalization_instance_common.hpp rename to library/src/tensor_operation_instance/gpu/normalization/normalization_instance_common.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/pool3d_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/pool3d_fwd/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/pool_fwd_instance_common.hpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/pool_fwd_instance_common.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/pool_fwd_instance_common.hpp rename to library/src/tensor_operation_instance/gpu/pool3d_fwd/pool_fwd_instance_common.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/quantization/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/quantization/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/conv2d_quantization_common.hpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/conv2d_quantization_common.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/conv2d_quantization_common.hpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/conv2d_quantization_common.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perchannel_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perchannel_quantization_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perchannel_quantization_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perchannel_quantization_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_int8_instance.hpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_int8_instance.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_int8_instance.hpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_int8_instance.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perchannel_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perchannel_quantization_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perchannel_quantization_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perchannel_quantization_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perlayer_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perlayer_quantization_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perlayer_quantization_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perlayer_quantization_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_int8_instance.hpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_int8_instance.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_int8_instance.hpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_int8_instance.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_instance.hpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_instance.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_instance.hpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_instance.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/gemm_quantization_common.hpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/gemm_quantization_common.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/gemm_quantization_common.hpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/gemm_quantization_common.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/utility/CMakeLists.txt b/library/src/utility/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/utility/CMakeLists.txt rename to library/src/utility/CMakeLists.txt diff --git a/composable_kernel/library/src/utility/convolution_parameter.cpp b/library/src/utility/convolution_parameter.cpp similarity index 100% rename from composable_kernel/library/src/utility/convolution_parameter.cpp rename to library/src/utility/convolution_parameter.cpp diff --git a/composable_kernel/library/src/utility/device_memory.cpp b/library/src/utility/device_memory.cpp similarity index 100% rename from composable_kernel/library/src/utility/device_memory.cpp rename to library/src/utility/device_memory.cpp diff --git a/composable_kernel/library/src/utility/host_tensor.cpp b/library/src/utility/host_tensor.cpp similarity index 100% rename from composable_kernel/library/src/utility/host_tensor.cpp rename to library/src/utility/host_tensor.cpp diff --git a/composable_kernel/profiler/CMakeLists.txt b/profiler/CMakeLists.txt similarity index 100% rename from composable_kernel/profiler/CMakeLists.txt rename to profiler/CMakeLists.txt diff --git a/composable_kernel/profiler/README.md b/profiler/README.md similarity index 100% rename from composable_kernel/profiler/README.md rename to profiler/README.md diff --git a/composable_kernel/profiler/include/profiler/data_type_enum.hpp b/profiler/include/profiler/data_type_enum.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/data_type_enum.hpp rename to profiler/include/profiler/data_type_enum.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp b/profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp rename to profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp b/profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp b/profiler/include/profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp b/profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batched_gemm_impl.hpp b/profiler/include/profiler/profile_batched_gemm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batched_gemm_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp b/profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp b/profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp b/profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batchnorm_backward_impl.hpp b/profiler/include/profiler/profile_batchnorm_backward_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batchnorm_backward_impl.hpp rename to profiler/include/profiler/profile_batchnorm_backward_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batchnorm_forward_impl.hpp b/profiler/include/profiler/profile_batchnorm_forward_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batchnorm_forward_impl.hpp rename to profiler/include/profiler/profile_batchnorm_forward_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batchnorm_infer_impl.hpp b/profiler/include/profiler/profile_batchnorm_infer_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batchnorm_infer_impl.hpp rename to profiler/include/profiler/profile_batchnorm_infer_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_contraction_impl.hpp b/profiler/include/profiler/profile_contraction_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_contraction_impl.hpp rename to profiler/include/profiler/profile_contraction_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_contraction_utils.hpp b/profiler/include/profiler/profile_contraction_utils.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_contraction_utils.hpp rename to profiler/include/profiler/profile_contraction_utils.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_conv_bwd_data_impl.hpp b/profiler/include/profiler/profile_conv_bwd_data_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_conv_bwd_data_impl.hpp rename to profiler/include/profiler/profile_conv_bwd_data_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp b/profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp rename to profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp b/profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp rename to profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_conv_fwd_impl.hpp b/profiler/include/profiler/profile_conv_fwd_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_conv_fwd_impl.hpp rename to profiler/include/profiler/profile_conv_fwd_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_conv_tensor_rearrange_impl.hpp b/profiler/include/profiler/profile_conv_tensor_rearrange_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_conv_tensor_rearrange_impl.hpp rename to profiler/include/profiler/profile_conv_tensor_rearrange_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_elementwise_layernorm_impl.hpp b/profiler/include/profiler/profile_elementwise_layernorm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_elementwise_layernorm_impl.hpp rename to profiler/include/profiler/profile_elementwise_layernorm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp b/profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp rename to profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp b/profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp rename to profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_add_multiply_impl.hpp b/profiler/include/profiler/profile_gemm_add_multiply_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_add_multiply_impl.hpp rename to profiler/include/profiler/profile_gemm_add_multiply_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_add_relu_add_layernorm_impl.hpp b/profiler/include/profiler/profile_gemm_add_relu_add_layernorm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_add_relu_add_layernorm_impl.hpp rename to profiler/include/profiler/profile_gemm_add_relu_add_layernorm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_bias_add_reduce_impl.hpp b/profiler/include/profiler/profile_gemm_bias_add_reduce_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_bias_add_reduce_impl.hpp rename to profiler/include/profiler/profile_gemm_bias_add_reduce_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_bilinear_impl.hpp b/profiler/include/profiler/profile_gemm_bilinear_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_bilinear_impl.hpp rename to profiler/include/profiler/profile_gemm_bilinear_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_fastgelu_impl.hpp b/profiler/include/profiler/profile_gemm_fastgelu_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_fastgelu_impl.hpp rename to profiler/include/profiler/profile_gemm_fastgelu_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_impl.hpp b/profiler/include/profiler/profile_gemm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_impl.hpp rename to profiler/include/profiler/profile_gemm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_multiply_add_impl.hpp b/profiler/include/profiler/profile_gemm_multiply_add_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_multiply_add_impl.hpp rename to profiler/include/profiler/profile_gemm_multiply_add_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_reduce_impl.hpp b/profiler/include/profiler/profile_gemm_reduce_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_reduce_impl.hpp rename to profiler/include/profiler/profile_gemm_reduce_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_splitk_impl.hpp b/profiler/include/profiler/profile_gemm_splitk_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_splitk_impl.hpp rename to profiler/include/profiler/profile_gemm_splitk_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_streamk_impl.hpp b/profiler/include/profiler/profile_gemm_streamk_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_streamk_impl.hpp rename to profiler/include/profiler/profile_gemm_streamk_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemv_splitk_impl.hpp b/profiler/include/profiler/profile_gemv_splitk_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemv_splitk_impl.hpp rename to profiler/include/profiler/profile_gemv_splitk_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_grouped_conv_bwd_data_impl.hpp b/profiler/include/profiler/profile_grouped_conv_bwd_data_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_grouped_conv_bwd_data_impl.hpp rename to profiler/include/profiler/profile_grouped_conv_bwd_data_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp b/profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp rename to profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp b/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp rename to profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_grouped_gemm_fastgelu_impl.hpp b/profiler/include/profiler/profile_grouped_gemm_fastgelu_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_grouped_gemm_fastgelu_impl.hpp rename to profiler/include/profiler/profile_grouped_gemm_fastgelu_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_grouped_gemm_impl.hpp b/profiler/include/profiler/profile_grouped_gemm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_grouped_gemm_impl.hpp rename to profiler/include/profiler/profile_grouped_gemm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_groupnorm_impl.hpp b/profiler/include/profiler/profile_groupnorm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_groupnorm_impl.hpp rename to profiler/include/profiler/profile_groupnorm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_layernorm_impl.hpp b/profiler/include/profiler/profile_layernorm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_layernorm_impl.hpp rename to profiler/include/profiler/profile_layernorm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_max_pool3d_bwd_impl.hpp b/profiler/include/profiler/profile_max_pool3d_bwd_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_max_pool3d_bwd_impl.hpp rename to profiler/include/profiler/profile_max_pool3d_bwd_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_pool3d_fwd_impl.hpp b/profiler/include/profiler/profile_pool3d_fwd_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_pool3d_fwd_impl.hpp rename to profiler/include/profiler/profile_pool3d_fwd_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_reduce_impl.hpp b/profiler/include/profiler/profile_reduce_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_reduce_impl.hpp rename to profiler/include/profiler/profile_reduce_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_softmax_impl.hpp b/profiler/include/profiler/profile_softmax_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_softmax_impl.hpp rename to profiler/include/profiler/profile_softmax_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_tall_and_skinny_gemm_splitk_impl.hpp b/profiler/include/profiler/profile_tall_and_skinny_gemm_splitk_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_tall_and_skinny_gemm_splitk_impl.hpp rename to profiler/include/profiler/profile_tall_and_skinny_gemm_splitk_impl.hpp diff --git a/composable_kernel/profiler/src/CMakeLists.txt b/profiler/src/CMakeLists.txt similarity index 100% rename from composable_kernel/profiler/src/CMakeLists.txt rename to profiler/src/CMakeLists.txt diff --git a/composable_kernel/profiler/src/profile_avg_pool3d_bwd.cpp b/profiler/src/profile_avg_pool3d_bwd.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_avg_pool3d_bwd.cpp rename to profiler/src/profile_avg_pool3d_bwd.cpp diff --git a/composable_kernel/profiler/src/profile_batched_gemm.cpp b/profiler/src/profile_batched_gemm.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_batched_gemm.cpp rename to profiler/src/profile_batched_gemm.cpp diff --git a/composable_kernel/profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp b/profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp rename to profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp diff --git a/composable_kernel/profiler/src/profile_batched_gemm_gemm.cpp b/profiler/src/profile_batched_gemm_gemm.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_batched_gemm_gemm.cpp rename to profiler/src/profile_batched_gemm_gemm.cpp diff --git a/composable_kernel/profiler/src/profile_batched_gemm_multi_d.cpp b/profiler/src/profile_batched_gemm_multi_d.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_batched_gemm_multi_d.cpp rename to profiler/src/profile_batched_gemm_multi_d.cpp diff --git a/composable_kernel/profiler/src/profile_batched_gemm_reduce.cpp b/profiler/src/profile_batched_gemm_reduce.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_batched_gemm_reduce.cpp rename to profiler/src/profile_batched_gemm_reduce.cpp diff --git a/composable_kernel/profiler/src/profile_batchnorm_bwd.cpp b/profiler/src/profile_batchnorm_bwd.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_batchnorm_bwd.cpp rename to profiler/src/profile_batchnorm_bwd.cpp diff --git a/composable_kernel/profiler/src/profile_batchnorm_fwd.cpp b/profiler/src/profile_batchnorm_fwd.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_batchnorm_fwd.cpp rename to profiler/src/profile_batchnorm_fwd.cpp diff --git a/composable_kernel/profiler/src/profile_batchnorm_infer.cpp b/profiler/src/profile_batchnorm_infer.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_batchnorm_infer.cpp rename to profiler/src/profile_batchnorm_infer.cpp diff --git a/composable_kernel/profiler/src/profile_contraction_bilinear.cpp b/profiler/src/profile_contraction_bilinear.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_contraction_bilinear.cpp rename to profiler/src/profile_contraction_bilinear.cpp diff --git a/composable_kernel/profiler/src/profile_contraction_scale.cpp b/profiler/src/profile_contraction_scale.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_contraction_scale.cpp rename to profiler/src/profile_contraction_scale.cpp diff --git a/composable_kernel/profiler/src/profile_conv_bwd_data.cpp b/profiler/src/profile_conv_bwd_data.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_conv_bwd_data.cpp rename to profiler/src/profile_conv_bwd_data.cpp diff --git a/composable_kernel/profiler/src/profile_conv_fwd.cpp b/profiler/src/profile_conv_fwd.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_conv_fwd.cpp rename to profiler/src/profile_conv_fwd.cpp diff --git a/composable_kernel/profiler/src/profile_conv_fwd_bias_relu.cpp b/profiler/src/profile_conv_fwd_bias_relu.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_conv_fwd_bias_relu.cpp rename to profiler/src/profile_conv_fwd_bias_relu.cpp diff --git a/composable_kernel/profiler/src/profile_conv_fwd_bias_relu_add.cpp b/profiler/src/profile_conv_fwd_bias_relu_add.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_conv_fwd_bias_relu_add.cpp rename to profiler/src/profile_conv_fwd_bias_relu_add.cpp diff --git a/composable_kernel/profiler/src/profile_conv_tensor_rearrange.cpp b/profiler/src/profile_conv_tensor_rearrange.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_conv_tensor_rearrange.cpp rename to profiler/src/profile_conv_tensor_rearrange.cpp diff --git a/composable_kernel/profiler/src/profile_gemm.cpp b/profiler/src/profile_gemm.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm.cpp rename to profiler/src/profile_gemm.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_add_add_fastgelu.cpp b/profiler/src/profile_gemm_add_add_fastgelu.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_add_add_fastgelu.cpp rename to profiler/src/profile_gemm_add_add_fastgelu.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_add_fastgelu.cpp b/profiler/src/profile_gemm_add_fastgelu.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_add_fastgelu.cpp rename to profiler/src/profile_gemm_add_fastgelu.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_add_multiply.cpp b/profiler/src/profile_gemm_add_multiply.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_add_multiply.cpp rename to profiler/src/profile_gemm_add_multiply.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_add_relu_add_layernorm.cpp b/profiler/src/profile_gemm_add_relu_add_layernorm.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_add_relu_add_layernorm.cpp rename to profiler/src/profile_gemm_add_relu_add_layernorm.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_bias_add_reduce.cpp b/profiler/src/profile_gemm_bias_add_reduce.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_bias_add_reduce.cpp rename to profiler/src/profile_gemm_bias_add_reduce.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_bilinear.cpp b/profiler/src/profile_gemm_bilinear.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_bilinear.cpp rename to profiler/src/profile_gemm_bilinear.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_fastgelu.cpp b/profiler/src/profile_gemm_fastgelu.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_fastgelu.cpp rename to profiler/src/profile_gemm_fastgelu.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_multiply_add.cpp b/profiler/src/profile_gemm_multiply_add.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_multiply_add.cpp rename to profiler/src/profile_gemm_multiply_add.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_reduce.cpp b/profiler/src/profile_gemm_reduce.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_reduce.cpp rename to profiler/src/profile_gemm_reduce.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_splitk.cpp b/profiler/src/profile_gemm_splitk.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_splitk.cpp rename to profiler/src/profile_gemm_splitk.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_streamk.cpp b/profiler/src/profile_gemm_streamk.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_streamk.cpp rename to profiler/src/profile_gemm_streamk.cpp diff --git a/composable_kernel/profiler/src/profile_gemv_splitk.cpp b/profiler/src/profile_gemv_splitk.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemv_splitk.cpp rename to profiler/src/profile_gemv_splitk.cpp diff --git a/composable_kernel/profiler/src/profile_grouped_conv_bwd_data.cpp b/profiler/src/profile_grouped_conv_bwd_data.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_grouped_conv_bwd_data.cpp rename to profiler/src/profile_grouped_conv_bwd_data.cpp diff --git a/composable_kernel/profiler/src/profile_grouped_conv_bwd_weight.cpp b/profiler/src/profile_grouped_conv_bwd_weight.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_grouped_conv_bwd_weight.cpp rename to profiler/src/profile_grouped_conv_bwd_weight.cpp diff --git a/composable_kernel/profiler/src/profile_grouped_conv_fwd.cpp b/profiler/src/profile_grouped_conv_fwd.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_grouped_conv_fwd.cpp rename to profiler/src/profile_grouped_conv_fwd.cpp diff --git a/composable_kernel/profiler/src/profile_grouped_gemm.cpp b/profiler/src/profile_grouped_gemm.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_grouped_gemm.cpp rename to profiler/src/profile_grouped_gemm.cpp diff --git a/composable_kernel/profiler/src/profile_grouped_gemm_fastgelu.cpp b/profiler/src/profile_grouped_gemm_fastgelu.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_grouped_gemm_fastgelu.cpp rename to profiler/src/profile_grouped_gemm_fastgelu.cpp diff --git a/composable_kernel/profiler/src/profile_groupnorm.cpp b/profiler/src/profile_groupnorm.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_groupnorm.cpp rename to profiler/src/profile_groupnorm.cpp diff --git a/composable_kernel/profiler/src/profile_layernorm.cpp b/profiler/src/profile_layernorm.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_layernorm.cpp rename to profiler/src/profile_layernorm.cpp diff --git a/composable_kernel/profiler/src/profile_max_pool3d_bwd.cpp b/profiler/src/profile_max_pool3d_bwd.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_max_pool3d_bwd.cpp rename to profiler/src/profile_max_pool3d_bwd.cpp diff --git a/composable_kernel/profiler/src/profile_max_pool3d_fwd.cpp b/profiler/src/profile_max_pool3d_fwd.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_max_pool3d_fwd.cpp rename to profiler/src/profile_max_pool3d_fwd.cpp diff --git a/composable_kernel/profiler/src/profile_reduce.cpp b/profiler/src/profile_reduce.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_reduce.cpp rename to profiler/src/profile_reduce.cpp diff --git a/composable_kernel/profiler/src/profile_softmax.cpp b/profiler/src/profile_softmax.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_softmax.cpp rename to profiler/src/profile_softmax.cpp diff --git a/composable_kernel/profiler/src/profile_tall_and_skinny_gemm_splitk.cpp b/profiler/src/profile_tall_and_skinny_gemm_splitk.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_tall_and_skinny_gemm_splitk.cpp rename to profiler/src/profile_tall_and_skinny_gemm_splitk.cpp diff --git a/composable_kernel/profiler/src/profiler.cpp b/profiler/src/profiler.cpp similarity index 100% rename from composable_kernel/profiler/src/profiler.cpp rename to profiler/src/profiler.cpp diff --git a/composable_kernel/profiler/src/profiler_operation_registry.hpp b/profiler/src/profiler_operation_registry.hpp similarity index 100% rename from composable_kernel/profiler/src/profiler_operation_registry.hpp rename to profiler/src/profiler_operation_registry.hpp diff --git a/composable_kernel/rbuild.ini b/rbuild.ini similarity index 100% rename from composable_kernel/rbuild.ini rename to rbuild.ini diff --git a/composable_kernel/requirements.txt b/requirements.txt similarity index 100% rename from composable_kernel/requirements.txt rename to requirements.txt diff --git a/composable_kernel/script/check_copyright_year.sh b/script/check_copyright_year.sh similarity index 100% rename from composable_kernel/script/check_copyright_year.sh rename to script/check_copyright_year.sh diff --git a/composable_kernel/script/clang-format-overwrite.sh b/script/clang-format-overwrite.sh similarity index 100% rename from composable_kernel/script/clang-format-overwrite.sh rename to script/clang-format-overwrite.sh diff --git a/composable_kernel/script/cmake-ck-dev.sh b/script/cmake-ck-dev.sh similarity index 100% rename from composable_kernel/script/cmake-ck-dev.sh rename to script/cmake-ck-dev.sh diff --git a/composable_kernel/script/cmake-ck-release.sh b/script/cmake-ck-release.sh similarity index 100% rename from composable_kernel/script/cmake-ck-release.sh rename to script/cmake-ck-release.sh diff --git a/composable_kernel/script/count_vgpr.sh b/script/count_vgpr.sh similarity index 100% rename from composable_kernel/script/count_vgpr.sh rename to script/count_vgpr.sh diff --git a/composable_kernel/script/hipclang_opt.sh b/script/hipclang_opt.sh similarity index 100% rename from composable_kernel/script/hipclang_opt.sh rename to script/hipclang_opt.sh diff --git a/composable_kernel/script/install_precommit.sh b/script/install_precommit.sh similarity index 100% rename from composable_kernel/script/install_precommit.sh rename to script/install_precommit.sh diff --git a/composable_kernel/script/parse_perf_data.py b/script/parse_perf_data.py similarity index 100% rename from composable_kernel/script/parse_perf_data.py rename to script/parse_perf_data.py diff --git a/composable_kernel/script/process_perf_data.py b/script/process_perf_data.py similarity index 100% rename from composable_kernel/script/process_perf_data.py rename to script/process_perf_data.py diff --git a/composable_kernel/script/process_perf_data.sh b/script/process_perf_data.sh similarity index 100% rename from composable_kernel/script/process_perf_data.sh rename to script/process_perf_data.sh diff --git a/composable_kernel/script/process_qa_data.sh b/script/process_qa_data.sh similarity index 100% rename from composable_kernel/script/process_qa_data.sh rename to script/process_qa_data.sh diff --git a/composable_kernel/script/profile_batched_gemm.sh b/script/profile_batched_gemm.sh similarity index 100% rename from composable_kernel/script/profile_batched_gemm.sh rename to script/profile_batched_gemm.sh diff --git a/composable_kernel/script/profile_conv_bwd_data.sh b/script/profile_conv_bwd_data.sh similarity index 100% rename from composable_kernel/script/profile_conv_bwd_data.sh rename to script/profile_conv_bwd_data.sh diff --git a/composable_kernel/script/profile_conv_fwd.sh b/script/profile_conv_fwd.sh similarity index 100% rename from composable_kernel/script/profile_conv_fwd.sh rename to script/profile_conv_fwd.sh diff --git a/composable_kernel/script/profile_gemm.sh b/script/profile_gemm.sh similarity index 100% rename from composable_kernel/script/profile_gemm.sh rename to script/profile_gemm.sh diff --git a/composable_kernel/script/profile_gemm_bilinear.sh b/script/profile_gemm_bilinear.sh similarity index 100% rename from composable_kernel/script/profile_gemm_bilinear.sh rename to script/profile_gemm_bilinear.sh diff --git a/composable_kernel/script/profile_grouped_gemm.sh b/script/profile_grouped_gemm.sh similarity index 100% rename from composable_kernel/script/profile_grouped_gemm.sh rename to script/profile_grouped_gemm.sh diff --git a/composable_kernel/script/profile_onnx_gemm.sh b/script/profile_onnx_gemm.sh similarity index 100% rename from composable_kernel/script/profile_onnx_gemm.sh rename to script/profile_onnx_gemm.sh diff --git a/composable_kernel/script/profile_reduce_no_index.sh b/script/profile_reduce_no_index.sh similarity index 100% rename from composable_kernel/script/profile_reduce_no_index.sh rename to script/profile_reduce_no_index.sh diff --git a/composable_kernel/script/profile_reduce_with_index.sh b/script/profile_reduce_with_index.sh similarity index 100% rename from composable_kernel/script/profile_reduce_with_index.sh rename to script/profile_reduce_with_index.sh diff --git a/composable_kernel/script/profile_resnet50.sh b/script/profile_resnet50.sh similarity index 100% rename from composable_kernel/script/profile_resnet50.sh rename to script/profile_resnet50.sh diff --git a/composable_kernel/script/profile_splitK_gemm.sh b/script/profile_splitK_gemm.sh similarity index 100% rename from composable_kernel/script/profile_splitK_gemm.sh rename to script/profile_splitK_gemm.sh diff --git a/composable_kernel/script/run_full_performance_tests.sh b/script/run_full_performance_tests.sh similarity index 100% rename from composable_kernel/script/run_full_performance_tests.sh rename to script/run_full_performance_tests.sh diff --git a/composable_kernel/script/run_performance_tests.sh b/script/run_performance_tests.sh similarity index 100% rename from composable_kernel/script/run_performance_tests.sh rename to script/run_performance_tests.sh diff --git a/composable_kernel/script/test_convnd_fwd.sh b/script/test_convnd_fwd.sh similarity index 100% rename from composable_kernel/script/test_convnd_fwd.sh rename to script/test_convnd_fwd.sh diff --git a/composable_kernel/script/test_reduce_no_index.sh b/script/test_reduce_no_index.sh similarity index 100% rename from composable_kernel/script/test_reduce_no_index.sh rename to script/test_reduce_no_index.sh diff --git a/composable_kernel/script/test_reduce_with_index.sh b/script/test_reduce_with_index.sh similarity index 100% rename from composable_kernel/script/test_reduce_with_index.sh rename to script/test_reduce_with_index.sh diff --git a/composable_kernel/script/uninstall_precommit.sh b/script/uninstall_precommit.sh similarity index 100% rename from composable_kernel/script/uninstall_precommit.sh rename to script/uninstall_precommit.sh diff --git a/composable_kernel/test/CMakeLists.txt b/test/CMakeLists.txt similarity index 100% rename from composable_kernel/test/CMakeLists.txt rename to test/CMakeLists.txt diff --git a/composable_kernel/test/batched_gemm/CMakeLists.txt b/test/batched_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/test/batched_gemm/CMakeLists.txt rename to test/batched_gemm/CMakeLists.txt diff --git a/composable_kernel/test/batched_gemm/batched_gemm_bf16.cpp b/test/batched_gemm/batched_gemm_bf16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm/batched_gemm_bf16.cpp rename to test/batched_gemm/batched_gemm_bf16.cpp diff --git a/composable_kernel/test/batched_gemm/batched_gemm_fp16.cpp b/test/batched_gemm/batched_gemm_fp16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm/batched_gemm_fp16.cpp rename to test/batched_gemm/batched_gemm_fp16.cpp diff --git a/composable_kernel/test/batched_gemm/batched_gemm_fp32.cpp b/test/batched_gemm/batched_gemm_fp32.cpp similarity index 100% rename from composable_kernel/test/batched_gemm/batched_gemm_fp32.cpp rename to test/batched_gemm/batched_gemm_fp32.cpp diff --git a/composable_kernel/test/batched_gemm/batched_gemm_int8.cpp b/test/batched_gemm/batched_gemm_int8.cpp similarity index 100% rename from composable_kernel/test/batched_gemm/batched_gemm_int8.cpp rename to test/batched_gemm/batched_gemm_int8.cpp diff --git a/composable_kernel/test/batched_gemm/test_batched_gemm.cpp b/test/batched_gemm/test_batched_gemm.cpp similarity index 100% rename from composable_kernel/test/batched_gemm/test_batched_gemm.cpp rename to test/batched_gemm/test_batched_gemm.cpp diff --git a/composable_kernel/test/batched_gemm_gemm/CMakeLists.txt b/test/batched_gemm_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/test/batched_gemm_gemm/CMakeLists.txt rename to test/batched_gemm_gemm/CMakeLists.txt diff --git a/composable_kernel/test/batched_gemm_gemm/test_batched_gemm_gemm_fp16.cpp b/test/batched_gemm_gemm/test_batched_gemm_gemm_fp16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm_gemm/test_batched_gemm_gemm_fp16.cpp rename to test/batched_gemm_gemm/test_batched_gemm_gemm_fp16.cpp diff --git a/composable_kernel/test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp b/test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp similarity index 100% rename from composable_kernel/test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp rename to test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp diff --git a/composable_kernel/test/batched_gemm_multi_d/CMakeLists.txt b/test/batched_gemm_multi_d/CMakeLists.txt similarity index 100% rename from composable_kernel/test/batched_gemm_multi_d/CMakeLists.txt rename to test/batched_gemm_multi_d/CMakeLists.txt diff --git a/composable_kernel/test/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp b/test/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp similarity index 100% rename from composable_kernel/test/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp rename to test/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp diff --git a/composable_kernel/test/batched_gemm_reduce/CMakeLists.txt b/test/batched_gemm_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/test/batched_gemm_reduce/CMakeLists.txt rename to test/batched_gemm_reduce/CMakeLists.txt diff --git a/composable_kernel/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp b/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp rename to test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp diff --git a/composable_kernel/test/batched_gemm_softmax_gemm/CMakeLists.txt b/test/batched_gemm_softmax_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm/CMakeLists.txt rename to test/batched_gemm_softmax_gemm/CMakeLists.txt diff --git a/composable_kernel/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_fp16.cpp b/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_fp16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_fp16.cpp rename to test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_fp16.cpp diff --git a/composable_kernel/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp b/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp rename to test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp diff --git a/composable_kernel/test/batched_gemm_softmax_gemm_permute/CMakeLists.txt b/test/batched_gemm_softmax_gemm_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm_permute/CMakeLists.txt rename to test/batched_gemm_softmax_gemm_permute/CMakeLists.txt diff --git a/composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp rename to test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp diff --git a/composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp rename to test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp diff --git a/composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_util.hpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_util.hpp similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_util.hpp rename to test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_util.hpp diff --git a/composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16.cpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16.cpp rename to test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16.cpp diff --git a/composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16.cpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16.cpp rename to test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16.cpp diff --git a/composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_util.hpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_util.hpp similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_util.hpp rename to test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_util.hpp diff --git a/composable_kernel/test/batchnorm/CMakeLists.txt b/test/batchnorm/CMakeLists.txt similarity index 100% rename from composable_kernel/test/batchnorm/CMakeLists.txt rename to test/batchnorm/CMakeLists.txt diff --git a/composable_kernel/test/batchnorm/batchnorm_bwd_rank_4.cpp b/test/batchnorm/batchnorm_bwd_rank_4.cpp similarity index 100% rename from composable_kernel/test/batchnorm/batchnorm_bwd_rank_4.cpp rename to test/batchnorm/batchnorm_bwd_rank_4.cpp diff --git a/composable_kernel/test/batchnorm/batchnorm_fwd_rank_4.cpp b/test/batchnorm/batchnorm_fwd_rank_4.cpp similarity index 100% rename from composable_kernel/test/batchnorm/batchnorm_fwd_rank_4.cpp rename to test/batchnorm/batchnorm_fwd_rank_4.cpp diff --git a/composable_kernel/test/batchnorm/batchnorm_infer_rank_4.cpp b/test/batchnorm/batchnorm_infer_rank_4.cpp similarity index 100% rename from composable_kernel/test/batchnorm/batchnorm_infer_rank_4.cpp rename to test/batchnorm/batchnorm_infer_rank_4.cpp diff --git a/composable_kernel/test/block_swizzle_test/block_swizzle_test.cpp b/test/block_swizzle_test/block_swizzle_test.cpp similarity index 100% rename from composable_kernel/test/block_swizzle_test/block_swizzle_test.cpp rename to test/block_swizzle_test/block_swizzle_test.cpp diff --git a/composable_kernel/test/block_swizzle_test/rebuild.sh b/test/block_swizzle_test/rebuild.sh similarity index 100% rename from composable_kernel/test/block_swizzle_test/rebuild.sh rename to test/block_swizzle_test/rebuild.sh diff --git a/composable_kernel/test/block_swizzle_test/simple_args.h b/test/block_swizzle_test/simple_args.h similarity index 100% rename from composable_kernel/test/block_swizzle_test/simple_args.h rename to test/block_swizzle_test/simple_args.h diff --git a/composable_kernel/test/block_to_ctile_map/CMakeLists.txt b/test/block_to_ctile_map/CMakeLists.txt similarity index 100% rename from composable_kernel/test/block_to_ctile_map/CMakeLists.txt rename to test/block_to_ctile_map/CMakeLists.txt diff --git a/composable_kernel/test/block_to_ctile_map/test_block_to_ctile_map.cpp b/test/block_to_ctile_map/test_block_to_ctile_map.cpp similarity index 100% rename from composable_kernel/test/block_to_ctile_map/test_block_to_ctile_map.cpp rename to test/block_to_ctile_map/test_block_to_ctile_map.cpp diff --git a/composable_kernel/test/contraction/CMakeLists.txt b/test/contraction/CMakeLists.txt similarity index 100% rename from composable_kernel/test/contraction/CMakeLists.txt rename to test/contraction/CMakeLists.txt diff --git a/composable_kernel/test/contraction/test_contraction.cpp b/test/contraction/test_contraction.cpp similarity index 100% rename from composable_kernel/test/contraction/test_contraction.cpp rename to test/contraction/test_contraction.cpp diff --git a/composable_kernel/test/contraction/test_contraction_interface.cpp b/test/contraction/test_contraction_interface.cpp similarity index 100% rename from composable_kernel/test/contraction/test_contraction_interface.cpp rename to test/contraction/test_contraction_interface.cpp diff --git a/composable_kernel/test/conv_tensor_rearrange/CMakeLists.txt b/test/conv_tensor_rearrange/CMakeLists.txt similarity index 100% rename from composable_kernel/test/conv_tensor_rearrange/CMakeLists.txt rename to test/conv_tensor_rearrange/CMakeLists.txt diff --git a/composable_kernel/test/conv_tensor_rearrange/test_conv_tensor_rearrange.cpp b/test/conv_tensor_rearrange/test_conv_tensor_rearrange.cpp similarity index 100% rename from composable_kernel/test/conv_tensor_rearrange/test_conv_tensor_rearrange.cpp rename to test/conv_tensor_rearrange/test_conv_tensor_rearrange.cpp diff --git a/composable_kernel/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp b/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp similarity index 100% rename from composable_kernel/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp rename to test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp diff --git a/composable_kernel/test/conv_util/CMakeLists.txt b/test/conv_util/CMakeLists.txt similarity index 100% rename from composable_kernel/test/conv_util/CMakeLists.txt rename to test/conv_util/CMakeLists.txt diff --git a/composable_kernel/test/conv_util/conv_util.cpp b/test/conv_util/conv_util.cpp similarity index 100% rename from composable_kernel/test/conv_util/conv_util.cpp rename to test/conv_util/conv_util.cpp diff --git a/composable_kernel/test/convnd_bwd_data/CMakeLists.txt b/test/convnd_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/test/convnd_bwd_data/CMakeLists.txt rename to test/convnd_bwd_data/CMakeLists.txt diff --git a/composable_kernel/test/convnd_bwd_data/convnd_bwd_data.cpp b/test/convnd_bwd_data/convnd_bwd_data.cpp similarity index 100% rename from composable_kernel/test/convnd_bwd_data/convnd_bwd_data.cpp rename to test/convnd_bwd_data/convnd_bwd_data.cpp diff --git a/composable_kernel/test/convnd_fwd/CMakeLists.txt b/test/convnd_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/test/convnd_fwd/CMakeLists.txt rename to test/convnd_fwd/CMakeLists.txt diff --git a/composable_kernel/test/convnd_fwd/convnd_fwd.cpp b/test/convnd_fwd/convnd_fwd.cpp similarity index 100% rename from composable_kernel/test/convnd_fwd/convnd_fwd.cpp rename to test/convnd_fwd/convnd_fwd.cpp diff --git a/composable_kernel/test/data_type/CMakeLists.txt b/test/data_type/CMakeLists.txt similarity index 100% rename from composable_kernel/test/data_type/CMakeLists.txt rename to test/data_type/CMakeLists.txt diff --git a/composable_kernel/test/data_type/test_bf8.cpp b/test/data_type/test_bf8.cpp similarity index 100% rename from composable_kernel/test/data_type/test_bf8.cpp rename to test/data_type/test_bf8.cpp diff --git a/composable_kernel/test/data_type/test_fp8.cpp b/test/data_type/test_fp8.cpp similarity index 100% rename from composable_kernel/test/data_type/test_fp8.cpp rename to test/data_type/test_fp8.cpp diff --git a/composable_kernel/test/data_type/test_int4.cpp b/test/data_type/test_int4.cpp similarity index 100% rename from composable_kernel/test/data_type/test_int4.cpp rename to test/data_type/test_int4.cpp diff --git a/composable_kernel/test/data_type/type_convert_const.cpp b/test/data_type/type_convert_const.cpp similarity index 100% rename from composable_kernel/test/data_type/type_convert_const.cpp rename to test/data_type/type_convert_const.cpp diff --git a/composable_kernel/test/elementwise_normalization/CMakeLists.txt b/test/elementwise_normalization/CMakeLists.txt similarity index 100% rename from composable_kernel/test/elementwise_normalization/CMakeLists.txt rename to test/elementwise_normalization/CMakeLists.txt diff --git a/composable_kernel/test/elementwise_normalization/test_elementwise_layernorm_fp16.cpp b/test/elementwise_normalization/test_elementwise_layernorm_fp16.cpp similarity index 100% rename from composable_kernel/test/elementwise_normalization/test_elementwise_layernorm_fp16.cpp rename to test/elementwise_normalization/test_elementwise_layernorm_fp16.cpp diff --git a/composable_kernel/test/gemm/CMakeLists.txt b/test/gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/test/gemm/CMakeLists.txt rename to test/gemm/CMakeLists.txt diff --git a/composable_kernel/test/gemm/gemm_bf16.cpp b/test/gemm/gemm_bf16.cpp similarity index 100% rename from composable_kernel/test/gemm/gemm_bf16.cpp rename to test/gemm/gemm_bf16.cpp diff --git a/composable_kernel/test/gemm/gemm_fp16.cpp b/test/gemm/gemm_fp16.cpp similarity index 100% rename from composable_kernel/test/gemm/gemm_fp16.cpp rename to test/gemm/gemm_fp16.cpp diff --git a/composable_kernel/test/gemm/gemm_fp32.cpp b/test/gemm/gemm_fp32.cpp similarity index 100% rename from composable_kernel/test/gemm/gemm_fp32.cpp rename to test/gemm/gemm_fp32.cpp diff --git a/composable_kernel/test/gemm/gemm_fp64.cpp b/test/gemm/gemm_fp64.cpp similarity index 100% rename from composable_kernel/test/gemm/gemm_fp64.cpp rename to test/gemm/gemm_fp64.cpp diff --git a/composable_kernel/test/gemm/gemm_int8.cpp b/test/gemm/gemm_int8.cpp similarity index 100% rename from composable_kernel/test/gemm/gemm_int8.cpp rename to test/gemm/gemm_int8.cpp diff --git a/composable_kernel/test/gemm/gemm_standalone_xdl_fp16.cpp b/test/gemm/gemm_standalone_xdl_fp16.cpp similarity index 100% rename from composable_kernel/test/gemm/gemm_standalone_xdl_fp16.cpp rename to test/gemm/gemm_standalone_xdl_fp16.cpp diff --git a/composable_kernel/test/gemm/gemm_util.hpp b/test/gemm/gemm_util.hpp similarity index 100% rename from composable_kernel/test/gemm/gemm_util.hpp rename to test/gemm/gemm_util.hpp diff --git a/composable_kernel/test/gemm/instance/gemm_f16_nn_instance.cpp b/test/gemm/instance/gemm_f16_nn_instance.cpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_f16_nn_instance.cpp rename to test/gemm/instance/gemm_f16_nn_instance.cpp diff --git a/composable_kernel/test/gemm/instance/gemm_f16_nn_instance.hpp b/test/gemm/instance/gemm_f16_nn_instance.hpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_f16_nn_instance.hpp rename to test/gemm/instance/gemm_f16_nn_instance.hpp diff --git a/composable_kernel/test/gemm/instance/gemm_f16_nt_instance.cpp b/test/gemm/instance/gemm_f16_nt_instance.cpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_f16_nt_instance.cpp rename to test/gemm/instance/gemm_f16_nt_instance.cpp diff --git a/composable_kernel/test/gemm/instance/gemm_f16_nt_instance.hpp b/test/gemm/instance/gemm_f16_nt_instance.hpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_f16_nt_instance.hpp rename to test/gemm/instance/gemm_f16_nt_instance.hpp diff --git a/composable_kernel/test/gemm/instance/gemm_f16_tn_instance.cpp b/test/gemm/instance/gemm_f16_tn_instance.cpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_f16_tn_instance.cpp rename to test/gemm/instance/gemm_f16_tn_instance.cpp diff --git a/composable_kernel/test/gemm/instance/gemm_f16_tn_instance.hpp b/test/gemm/instance/gemm_f16_tn_instance.hpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_f16_tn_instance.hpp rename to test/gemm/instance/gemm_f16_tn_instance.hpp diff --git a/composable_kernel/test/gemm/instance/gemm_f16_tt_instance.cpp b/test/gemm/instance/gemm_f16_tt_instance.cpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_f16_tt_instance.cpp rename to test/gemm/instance/gemm_f16_tt_instance.cpp diff --git a/composable_kernel/test/gemm/instance/gemm_f16_tt_instance.hpp b/test/gemm/instance/gemm_f16_tt_instance.hpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_f16_tt_instance.hpp rename to test/gemm/instance/gemm_f16_tt_instance.hpp diff --git a/composable_kernel/test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp b/test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp rename to test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp diff --git a/composable_kernel/test/gemm/instance/gemm_wavelet_f16_tn_instance.hpp b/test/gemm/instance/gemm_wavelet_f16_tn_instance.hpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_wavelet_f16_tn_instance.hpp rename to test/gemm/instance/gemm_wavelet_f16_tn_instance.hpp diff --git a/composable_kernel/test/gemm/run_gemm_test.inc b/test/gemm/run_gemm_test.inc similarity index 100% rename from composable_kernel/test/gemm/run_gemm_test.inc rename to test/gemm/run_gemm_test.inc diff --git a/composable_kernel/test/gemm_layernorm/CMakeLists.txt b/test/gemm_layernorm/CMakeLists.txt similarity index 100% rename from composable_kernel/test/gemm_layernorm/CMakeLists.txt rename to test/gemm_layernorm/CMakeLists.txt diff --git a/composable_kernel/test/gemm_layernorm/test_gemm_add_relu_add_layernorm_fp16.cpp b/test/gemm_layernorm/test_gemm_add_relu_add_layernorm_fp16.cpp similarity index 100% rename from composable_kernel/test/gemm_layernorm/test_gemm_add_relu_add_layernorm_fp16.cpp rename to test/gemm_layernorm/test_gemm_add_relu_add_layernorm_fp16.cpp diff --git a/composable_kernel/test/gemm_reduce/CMakeLists.txt b/test/gemm_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/test/gemm_reduce/CMakeLists.txt rename to test/gemm_reduce/CMakeLists.txt diff --git a/composable_kernel/test/gemm_reduce/gemm_reduce_fp16.cpp b/test/gemm_reduce/gemm_reduce_fp16.cpp similarity index 100% rename from composable_kernel/test/gemm_reduce/gemm_reduce_fp16.cpp rename to test/gemm_reduce/gemm_reduce_fp16.cpp diff --git a/composable_kernel/test/gemm_split_k/CMakeLists.txt b/test/gemm_split_k/CMakeLists.txt similarity index 100% rename from composable_kernel/test/gemm_split_k/CMakeLists.txt rename to test/gemm_split_k/CMakeLists.txt diff --git a/composable_kernel/test/gemm_split_k/test_gemm_splitk.cpp b/test/gemm_split_k/test_gemm_splitk.cpp similarity index 100% rename from composable_kernel/test/gemm_split_k/test_gemm_splitk.cpp rename to test/gemm_split_k/test_gemm_splitk.cpp diff --git a/composable_kernel/test/gemm_split_k/test_gemm_splitk_ut_cases.inc b/test/gemm_split_k/test_gemm_splitk_ut_cases.inc similarity index 100% rename from composable_kernel/test/gemm_split_k/test_gemm_splitk_ut_cases.inc rename to test/gemm_split_k/test_gemm_splitk_ut_cases.inc diff --git a/composable_kernel/test/gemm_split_k/test_gemm_splitk_util.hpp b/test/gemm_split_k/test_gemm_splitk_util.hpp similarity index 100% rename from composable_kernel/test/gemm_split_k/test_gemm_splitk_util.hpp rename to test/gemm_split_k/test_gemm_splitk_util.hpp diff --git a/composable_kernel/test/grouped_convnd_bwd_data/CMakeLists.txt b/test/grouped_convnd_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/test/grouped_convnd_bwd_data/CMakeLists.txt rename to test/grouped_convnd_bwd_data/CMakeLists.txt diff --git a/composable_kernel/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp b/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp similarity index 100% rename from composable_kernel/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp rename to test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp diff --git a/composable_kernel/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_wmma.cpp b/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_wmma.cpp similarity index 100% rename from composable_kernel/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_wmma.cpp rename to test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_wmma.cpp diff --git a/composable_kernel/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_xdl.cpp b/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_xdl.cpp similarity index 100% rename from composable_kernel/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_xdl.cpp rename to test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_xdl.cpp diff --git a/composable_kernel/test/grouped_convnd_bwd_weight/CMakeLists.txt b/test/grouped_convnd_bwd_weight/CMakeLists.txt similarity index 100% rename from composable_kernel/test/grouped_convnd_bwd_weight/CMakeLists.txt rename to test/grouped_convnd_bwd_weight/CMakeLists.txt diff --git a/composable_kernel/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp b/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp similarity index 100% rename from composable_kernel/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp rename to test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp diff --git a/composable_kernel/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_wmma.cpp b/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_wmma.cpp similarity index 100% rename from composable_kernel/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_wmma.cpp rename to test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_wmma.cpp diff --git a/composable_kernel/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_xdl.cpp b/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_xdl.cpp similarity index 100% rename from composable_kernel/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_xdl.cpp rename to test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_xdl.cpp diff --git a/composable_kernel/test/grouped_convnd_fwd/CMakeLists.txt b/test/grouped_convnd_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/test/grouped_convnd_fwd/CMakeLists.txt rename to test/grouped_convnd_fwd/CMakeLists.txt diff --git a/composable_kernel/test/grouped_convnd_fwd/grouped_convnd_fwd.cpp b/test/grouped_convnd_fwd/grouped_convnd_fwd.cpp similarity index 100% rename from composable_kernel/test/grouped_convnd_fwd/grouped_convnd_fwd.cpp rename to test/grouped_convnd_fwd/grouped_convnd_fwd.cpp diff --git a/composable_kernel/test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp b/test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp similarity index 100% rename from composable_kernel/test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp rename to test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp diff --git a/composable_kernel/test/grouped_gemm/CMakeLists.txt b/test/grouped_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/test/grouped_gemm/CMakeLists.txt rename to test/grouped_gemm/CMakeLists.txt diff --git a/composable_kernel/test/grouped_gemm/test_grouped_gemm_interface.cpp b/test/grouped_gemm/test_grouped_gemm_interface.cpp similarity index 100% rename from composable_kernel/test/grouped_gemm/test_grouped_gemm_interface.cpp rename to test/grouped_gemm/test_grouped_gemm_interface.cpp diff --git a/composable_kernel/test/grouped_gemm/test_grouped_gemm_splitk.cpp b/test/grouped_gemm/test_grouped_gemm_splitk.cpp similarity index 100% rename from composable_kernel/test/grouped_gemm/test_grouped_gemm_splitk.cpp rename to test/grouped_gemm/test_grouped_gemm_splitk.cpp diff --git a/composable_kernel/test/grouped_gemm/test_grouped_gemm_ut_cases.inc b/test/grouped_gemm/test_grouped_gemm_ut_cases.inc similarity index 100% rename from composable_kernel/test/grouped_gemm/test_grouped_gemm_ut_cases.inc rename to test/grouped_gemm/test_grouped_gemm_ut_cases.inc diff --git a/composable_kernel/test/grouped_gemm/test_grouped_gemm_util.hpp b/test/grouped_gemm/test_grouped_gemm_util.hpp similarity index 100% rename from composable_kernel/test/grouped_gemm/test_grouped_gemm_util.hpp rename to test/grouped_gemm/test_grouped_gemm_util.hpp diff --git a/composable_kernel/test/image_to_column/CMakeLists.txt b/test/image_to_column/CMakeLists.txt similarity index 100% rename from composable_kernel/test/image_to_column/CMakeLists.txt rename to test/image_to_column/CMakeLists.txt diff --git a/composable_kernel/test/image_to_column/test_image_to_column.cpp b/test/image_to_column/test_image_to_column.cpp similarity index 100% rename from composable_kernel/test/image_to_column/test_image_to_column.cpp rename to test/image_to_column/test_image_to_column.cpp diff --git a/composable_kernel/test/image_to_column/test_image_to_column_interface.cpp b/test/image_to_column/test_image_to_column_interface.cpp similarity index 100% rename from composable_kernel/test/image_to_column/test_image_to_column_interface.cpp rename to test/image_to_column/test_image_to_column_interface.cpp diff --git a/composable_kernel/test/magic_number_division/CMakeLists.txt b/test/magic_number_division/CMakeLists.txt similarity index 100% rename from composable_kernel/test/magic_number_division/CMakeLists.txt rename to test/magic_number_division/CMakeLists.txt diff --git a/composable_kernel/test/magic_number_division/magic_number_division.cpp b/test/magic_number_division/magic_number_division.cpp similarity index 100% rename from composable_kernel/test/magic_number_division/magic_number_division.cpp rename to test/magic_number_division/magic_number_division.cpp diff --git a/composable_kernel/test/normalization/CMakeLists.txt b/test/normalization/CMakeLists.txt similarity index 100% rename from composable_kernel/test/normalization/CMakeLists.txt rename to test/normalization/CMakeLists.txt diff --git a/composable_kernel/test/normalization/test_groupnorm_fp16.cpp b/test/normalization/test_groupnorm_fp16.cpp similarity index 100% rename from composable_kernel/test/normalization/test_groupnorm_fp16.cpp rename to test/normalization/test_groupnorm_fp16.cpp diff --git a/composable_kernel/test/normalization/test_groupnorm_fp32.cpp b/test/normalization/test_groupnorm_fp32.cpp similarity index 100% rename from composable_kernel/test/normalization/test_groupnorm_fp32.cpp rename to test/normalization/test_groupnorm_fp32.cpp diff --git a/composable_kernel/test/normalization/test_layernorm2d_fp16.cpp b/test/normalization/test_layernorm2d_fp16.cpp similarity index 100% rename from composable_kernel/test/normalization/test_layernorm2d_fp16.cpp rename to test/normalization/test_layernorm2d_fp16.cpp diff --git a/composable_kernel/test/normalization/test_layernorm2d_fp32.cpp b/test/normalization/test_layernorm2d_fp32.cpp similarity index 100% rename from composable_kernel/test/normalization/test_layernorm2d_fp32.cpp rename to test/normalization/test_layernorm2d_fp32.cpp diff --git a/composable_kernel/test/pool/CMakeLists.txt b/test/pool/CMakeLists.txt similarity index 100% rename from composable_kernel/test/pool/CMakeLists.txt rename to test/pool/CMakeLists.txt diff --git a/composable_kernel/test/pool/test_avg_pool3d_bwd.cpp b/test/pool/test_avg_pool3d_bwd.cpp similarity index 100% rename from composable_kernel/test/pool/test_avg_pool3d_bwd.cpp rename to test/pool/test_avg_pool3d_bwd.cpp diff --git a/composable_kernel/test/pool/test_avg_pool3d_fwd.cpp b/test/pool/test_avg_pool3d_fwd.cpp similarity index 100% rename from composable_kernel/test/pool/test_avg_pool3d_fwd.cpp rename to test/pool/test_avg_pool3d_fwd.cpp diff --git a/composable_kernel/test/pool/test_max_pool3d_bwd.cpp b/test/pool/test_max_pool3d_bwd.cpp similarity index 100% rename from composable_kernel/test/pool/test_max_pool3d_bwd.cpp rename to test/pool/test_max_pool3d_bwd.cpp diff --git a/composable_kernel/test/pool/test_max_pool3d_fwd.cpp b/test/pool/test_max_pool3d_fwd.cpp similarity index 100% rename from composable_kernel/test/pool/test_max_pool3d_fwd.cpp rename to test/pool/test_max_pool3d_fwd.cpp diff --git a/composable_kernel/test/pool/test_pool_fwd_common.hpp b/test/pool/test_pool_fwd_common.hpp similarity index 100% rename from composable_kernel/test/pool/test_pool_fwd_common.hpp rename to test/pool/test_pool_fwd_common.hpp diff --git a/composable_kernel/test/reduce/CMakeLists.txt b/test/reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/test/reduce/CMakeLists.txt rename to test/reduce/CMakeLists.txt diff --git a/composable_kernel/test/reduce/reduce_no_index.cpp b/test/reduce/reduce_no_index.cpp similarity index 100% rename from composable_kernel/test/reduce/reduce_no_index.cpp rename to test/reduce/reduce_no_index.cpp diff --git a/composable_kernel/test/reduce/reduce_with_index.cpp b/test/reduce/reduce_with_index.cpp similarity index 100% rename from composable_kernel/test/reduce/reduce_with_index.cpp rename to test/reduce/reduce_with_index.cpp diff --git a/composable_kernel/test/reference_conv_fwd/CMakeLists.txt b/test/reference_conv_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/test/reference_conv_fwd/CMakeLists.txt rename to test/reference_conv_fwd/CMakeLists.txt diff --git a/composable_kernel/test/reference_conv_fwd/reference_conv_fwd.cpp b/test/reference_conv_fwd/reference_conv_fwd.cpp similarity index 100% rename from composable_kernel/test/reference_conv_fwd/reference_conv_fwd.cpp rename to test/reference_conv_fwd/reference_conv_fwd.cpp diff --git a/composable_kernel/test/softmax/CMakeLists.txt b/test/softmax/CMakeLists.txt similarity index 100% rename from composable_kernel/test/softmax/CMakeLists.txt rename to test/softmax/CMakeLists.txt diff --git a/composable_kernel/test/softmax/test_softmax_interface.cpp b/test/softmax/test_softmax_interface.cpp similarity index 100% rename from composable_kernel/test/softmax/test_softmax_interface.cpp rename to test/softmax/test_softmax_interface.cpp diff --git a/composable_kernel/test/softmax/test_softmax_rank3.cpp b/test/softmax/test_softmax_rank3.cpp similarity index 100% rename from composable_kernel/test/softmax/test_softmax_rank3.cpp rename to test/softmax/test_softmax_rank3.cpp diff --git a/composable_kernel/test/softmax/test_softmax_rank4.cpp b/test/softmax/test_softmax_rank4.cpp similarity index 100% rename from composable_kernel/test/softmax/test_softmax_rank4.cpp rename to test/softmax/test_softmax_rank4.cpp diff --git a/composable_kernel/test/softmax/test_softmax_ut_cases.inc b/test/softmax/test_softmax_ut_cases.inc similarity index 100% rename from composable_kernel/test/softmax/test_softmax_ut_cases.inc rename to test/softmax/test_softmax_ut_cases.inc diff --git a/composable_kernel/test/softmax/test_softmax_util.hpp b/test/softmax/test_softmax_util.hpp similarity index 100% rename from composable_kernel/test/softmax/test_softmax_util.hpp rename to test/softmax/test_softmax_util.hpp diff --git a/composable_kernel/test/space_filling_curve/CMakeLists.txt b/test/space_filling_curve/CMakeLists.txt similarity index 100% rename from composable_kernel/test/space_filling_curve/CMakeLists.txt rename to test/space_filling_curve/CMakeLists.txt diff --git a/composable_kernel/test/space_filling_curve/space_filling_curve.cpp b/test/space_filling_curve/space_filling_curve.cpp similarity index 100% rename from composable_kernel/test/space_filling_curve/space_filling_curve.cpp rename to test/space_filling_curve/space_filling_curve.cpp diff --git a/composable_kernel/test/wmma_op/CMakeLists.txt b/test/wmma_op/CMakeLists.txt similarity index 100% rename from composable_kernel/test/wmma_op/CMakeLists.txt rename to test/wmma_op/CMakeLists.txt diff --git a/composable_kernel/test/wmma_op/wmma_op.cpp b/test/wmma_op/wmma_op.cpp similarity index 100% rename from composable_kernel/test/wmma_op/wmma_op.cpp rename to test/wmma_op/wmma_op.cpp diff --git a/composable_kernel/test/wmma_op/wmma_op_util.hpp b/test/wmma_op/wmma_op_util.hpp similarity index 100% rename from composable_kernel/test/wmma_op/wmma_op_util.hpp rename to test/wmma_op/wmma_op_util.hpp -- GitLab