diff --git a/composable_kernel/CHANGELOG.md b/CHANGELOG.md similarity index 100% rename from composable_kernel/CHANGELOG.md rename to CHANGELOG.md diff --git a/composable_kernel/CITATION.cff b/CITATION.cff similarity index 100% rename from composable_kernel/CITATION.cff rename to CITATION.cff diff --git a/composable_kernel/CMakeLists.txt b/CMakeLists.txt similarity index 100% rename from composable_kernel/CMakeLists.txt rename to CMakeLists.txt diff --git a/composable_kernel/CONTRIBUTORS.md b/CONTRIBUTORS.md similarity index 100% rename from composable_kernel/CONTRIBUTORS.md rename to CONTRIBUTORS.md diff --git a/composable_kernel/Config.cmake.in b/Config.cmake.in similarity index 100% rename from composable_kernel/Config.cmake.in rename to Config.cmake.in diff --git a/composable_kernel/Dockerfile b/Dockerfile similarity index 100% rename from composable_kernel/Dockerfile rename to Dockerfile diff --git a/composable_kernel/Jenkinsfile b/Jenkinsfile similarity index 100% rename from composable_kernel/Jenkinsfile rename to Jenkinsfile diff --git a/composable_kernel/LICENSE b/LICENSE similarity index 100% rename from composable_kernel/LICENSE rename to LICENSE diff --git a/composable_kernel/README.md b/README.md similarity index 100% rename from composable_kernel/README.md rename to README.md diff --git a/composable_kernel/client_example/01_gemm/CMakeLists.txt b/client_example/01_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/01_gemm/CMakeLists.txt rename to client_example/01_gemm/CMakeLists.txt diff --git a/composable_kernel/client_example/01_gemm/gemm.cpp b/client_example/01_gemm/gemm.cpp similarity index 100% rename from composable_kernel/client_example/01_gemm/gemm.cpp rename to client_example/01_gemm/gemm.cpp diff --git a/composable_kernel/client_example/02_gemm_add_add_fastgelu/CMakeLists.txt b/client_example/02_gemm_add_add_fastgelu/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/02_gemm_add_add_fastgelu/CMakeLists.txt rename to client_example/02_gemm_add_add_fastgelu/CMakeLists.txt diff --git a/composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp similarity index 100% rename from composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp rename to client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp diff --git a/composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp similarity index 100% rename from composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp rename to client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp diff --git a/composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp similarity index 100% rename from composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp rename to client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp diff --git a/composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp similarity index 100% rename from composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp rename to client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp diff --git a/composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp similarity index 100% rename from composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp rename to client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp diff --git a/composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp similarity index 100% rename from composable_kernel/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp rename to client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp diff --git a/composable_kernel/client_example/03_gemm_layernorm/CMakeLists.txt b/client_example/03_gemm_layernorm/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/03_gemm_layernorm/CMakeLists.txt rename to client_example/03_gemm_layernorm/CMakeLists.txt diff --git a/composable_kernel/client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp b/client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp similarity index 100% rename from composable_kernel/client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp rename to client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp diff --git a/composable_kernel/client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp b/client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp similarity index 100% rename from composable_kernel/client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp rename to client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp diff --git a/composable_kernel/client_example/04_contraction/CMakeLists.txt b/client_example/04_contraction/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/04_contraction/CMakeLists.txt rename to client_example/04_contraction/CMakeLists.txt diff --git a/composable_kernel/client_example/04_contraction/contraction_bilinear_fp32.cpp b/client_example/04_contraction/contraction_bilinear_fp32.cpp similarity index 100% rename from composable_kernel/client_example/04_contraction/contraction_bilinear_fp32.cpp rename to client_example/04_contraction/contraction_bilinear_fp32.cpp diff --git a/composable_kernel/client_example/04_contraction/contraction_bilinear_fp64.cpp b/client_example/04_contraction/contraction_bilinear_fp64.cpp similarity index 100% rename from composable_kernel/client_example/04_contraction/contraction_bilinear_fp64.cpp rename to client_example/04_contraction/contraction_bilinear_fp64.cpp diff --git a/composable_kernel/client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp b/client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp similarity index 100% rename from composable_kernel/client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp rename to client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp diff --git a/composable_kernel/client_example/04_contraction/contraction_scale_fp32.cpp b/client_example/04_contraction/contraction_scale_fp32.cpp similarity index 100% rename from composable_kernel/client_example/04_contraction/contraction_scale_fp32.cpp rename to client_example/04_contraction/contraction_scale_fp32.cpp diff --git a/composable_kernel/client_example/04_contraction/contraction_scale_fp64.cpp b/client_example/04_contraction/contraction_scale_fp64.cpp similarity index 100% rename from composable_kernel/client_example/04_contraction/contraction_scale_fp64.cpp rename to client_example/04_contraction/contraction_scale_fp64.cpp diff --git a/composable_kernel/client_example/05_layernorm/CMakeLists.txt b/client_example/05_layernorm/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/05_layernorm/CMakeLists.txt rename to client_example/05_layernorm/CMakeLists.txt diff --git a/composable_kernel/client_example/05_layernorm/layernorm2d.cpp b/client_example/05_layernorm/layernorm2d.cpp similarity index 100% rename from composable_kernel/client_example/05_layernorm/layernorm2d.cpp rename to client_example/05_layernorm/layernorm2d.cpp diff --git a/composable_kernel/client_example/06_softmax/CMakeLists.txt b/client_example/06_softmax/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/06_softmax/CMakeLists.txt rename to client_example/06_softmax/CMakeLists.txt diff --git a/composable_kernel/client_example/06_softmax/softmax4d.cpp b/client_example/06_softmax/softmax4d.cpp similarity index 100% rename from composable_kernel/client_example/06_softmax/softmax4d.cpp rename to client_example/06_softmax/softmax4d.cpp diff --git a/composable_kernel/client_example/07_grouped_convnd_fwd/CMakeLists.txt b/client_example/07_grouped_convnd_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/07_grouped_convnd_fwd/CMakeLists.txt rename to client_example/07_grouped_convnd_fwd/CMakeLists.txt diff --git a/composable_kernel/client_example/07_grouped_convnd_fwd/grouped_conv1d_fwd.cpp b/client_example/07_grouped_convnd_fwd/grouped_conv1d_fwd.cpp similarity index 100% rename from composable_kernel/client_example/07_grouped_convnd_fwd/grouped_conv1d_fwd.cpp rename to client_example/07_grouped_convnd_fwd/grouped_conv1d_fwd.cpp diff --git a/composable_kernel/client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp b/client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp similarity index 100% rename from composable_kernel/client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp rename to client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp diff --git a/composable_kernel/client_example/08_fused_attention/CMakeLists.txt b/client_example/08_fused_attention/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/08_fused_attention/CMakeLists.txt rename to client_example/08_fused_attention/CMakeLists.txt diff --git a/composable_kernel/client_example/08_fused_attention/fused_attention.cpp b/client_example/08_fused_attention/fused_attention.cpp similarity index 100% rename from composable_kernel/client_example/08_fused_attention/fused_attention.cpp rename to client_example/08_fused_attention/fused_attention.cpp diff --git a/composable_kernel/client_example/08_fused_attention/fused_attention_bias.cpp b/client_example/08_fused_attention/fused_attention_bias.cpp similarity index 100% rename from composable_kernel/client_example/08_fused_attention/fused_attention_bias.cpp rename to client_example/08_fused_attention/fused_attention_bias.cpp diff --git a/composable_kernel/client_example/09_quantization/CMakeLists.txt b/client_example/09_quantization/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/09_quantization/CMakeLists.txt rename to client_example/09_quantization/CMakeLists.txt diff --git a/composable_kernel/client_example/09_quantization/conv2d_fwd_bias_relu_perchannel_quantization.cpp b/client_example/09_quantization/conv2d_fwd_bias_relu_perchannel_quantization.cpp similarity index 100% rename from composable_kernel/client_example/09_quantization/conv2d_fwd_bias_relu_perchannel_quantization.cpp rename to client_example/09_quantization/conv2d_fwd_bias_relu_perchannel_quantization.cpp diff --git a/composable_kernel/client_example/09_quantization/conv2d_fwd_bias_relu_perlayer_quantization.cpp b/client_example/09_quantization/conv2d_fwd_bias_relu_perlayer_quantization.cpp similarity index 100% rename from composable_kernel/client_example/09_quantization/conv2d_fwd_bias_relu_perlayer_quantization.cpp rename to client_example/09_quantization/conv2d_fwd_bias_relu_perlayer_quantization.cpp diff --git a/composable_kernel/client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp b/client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp similarity index 100% rename from composable_kernel/client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp rename to client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp diff --git a/composable_kernel/client_example/09_quantization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp b/client_example/09_quantization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp similarity index 100% rename from composable_kernel/client_example/09_quantization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp rename to client_example/09_quantization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp diff --git a/composable_kernel/client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp b/client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp similarity index 100% rename from composable_kernel/client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp rename to client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp diff --git a/composable_kernel/client_example/09_quantization/conv2d_fwd_perlayer_quantization.cpp b/client_example/09_quantization/conv2d_fwd_perlayer_quantization.cpp similarity index 100% rename from composable_kernel/client_example/09_quantization/conv2d_fwd_perlayer_quantization.cpp rename to client_example/09_quantization/conv2d_fwd_perlayer_quantization.cpp diff --git a/composable_kernel/client_example/09_quantization/gemm_quantization.cpp b/client_example/09_quantization/gemm_quantization.cpp similarity index 100% rename from composable_kernel/client_example/09_quantization/gemm_quantization.cpp rename to client_example/09_quantization/gemm_quantization.cpp diff --git a/composable_kernel/client_example/10_grouped_convnd_bwd_data/CMakeLists.txt b/client_example/10_grouped_convnd_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/10_grouped_convnd_bwd_data/CMakeLists.txt rename to client_example/10_grouped_convnd_bwd_data/CMakeLists.txt diff --git a/composable_kernel/client_example/10_grouped_convnd_bwd_data/grouped_conv2d_bwd_data.cpp b/client_example/10_grouped_convnd_bwd_data/grouped_conv2d_bwd_data.cpp similarity index 100% rename from composable_kernel/client_example/10_grouped_convnd_bwd_data/grouped_conv2d_bwd_data.cpp rename to client_example/10_grouped_convnd_bwd_data/grouped_conv2d_bwd_data.cpp diff --git a/composable_kernel/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data.cpp b/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data.cpp similarity index 100% rename from composable_kernel/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data.cpp rename to client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data.cpp diff --git a/composable_kernel/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp b/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp similarity index 100% rename from composable_kernel/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp rename to client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp diff --git a/composable_kernel/client_example/11_grouped_conv_bwd_weight/CMakeLists.txt b/client_example/11_grouped_conv_bwd_weight/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/11_grouped_conv_bwd_weight/CMakeLists.txt rename to client_example/11_grouped_conv_bwd_weight/CMakeLists.txt diff --git a/composable_kernel/client_example/11_grouped_conv_bwd_weight/common.hpp b/client_example/11_grouped_conv_bwd_weight/common.hpp similarity index 100% rename from composable_kernel/client_example/11_grouped_conv_bwd_weight/common.hpp rename to client_example/11_grouped_conv_bwd_weight/common.hpp diff --git a/composable_kernel/client_example/11_grouped_conv_bwd_weight/grouped_conv1d_bwd_weight_fp16.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv1d_bwd_weight_fp16.cpp similarity index 100% rename from composable_kernel/client_example/11_grouped_conv_bwd_weight/grouped_conv1d_bwd_weight_fp16.cpp rename to client_example/11_grouped_conv_bwd_weight/grouped_conv1d_bwd_weight_fp16.cpp diff --git a/composable_kernel/client_example/11_grouped_conv_bwd_weight/grouped_conv2d_bwd_weight_fp16.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv2d_bwd_weight_fp16.cpp similarity index 100% rename from composable_kernel/client_example/11_grouped_conv_bwd_weight/grouped_conv2d_bwd_weight_fp16.cpp rename to client_example/11_grouped_conv_bwd_weight/grouped_conv2d_bwd_weight_fp16.cpp diff --git a/composable_kernel/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16.cpp similarity index 100% rename from composable_kernel/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16.cpp rename to client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16.cpp diff --git a/composable_kernel/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp32.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp32.cpp similarity index 100% rename from composable_kernel/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp32.cpp rename to client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp32.cpp diff --git a/composable_kernel/client_example/12_elementwise_normalization/CMakeLists.txt b/client_example/12_elementwise_normalization/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/12_elementwise_normalization/CMakeLists.txt rename to client_example/12_elementwise_normalization/CMakeLists.txt diff --git a/composable_kernel/client_example/12_elementwise_normalization/elementwise_layernorm2d.cpp b/client_example/12_elementwise_normalization/elementwise_layernorm2d.cpp similarity index 100% rename from composable_kernel/client_example/12_elementwise_normalization/elementwise_layernorm2d.cpp rename to client_example/12_elementwise_normalization/elementwise_layernorm2d.cpp diff --git a/composable_kernel/client_example/13_batchnorm/CMakeLists.txt b/client_example/13_batchnorm/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/13_batchnorm/CMakeLists.txt rename to client_example/13_batchnorm/CMakeLists.txt diff --git a/composable_kernel/client_example/13_batchnorm/batchnorm_bwd_nhwc.cpp b/client_example/13_batchnorm/batchnorm_bwd_nhwc.cpp similarity index 100% rename from composable_kernel/client_example/13_batchnorm/batchnorm_bwd_nhwc.cpp rename to client_example/13_batchnorm/batchnorm_bwd_nhwc.cpp diff --git a/composable_kernel/client_example/13_batchnorm/batchnorm_fwd_nhwc.cpp b/client_example/13_batchnorm/batchnorm_fwd_nhwc.cpp similarity index 100% rename from composable_kernel/client_example/13_batchnorm/batchnorm_fwd_nhwc.cpp rename to client_example/13_batchnorm/batchnorm_fwd_nhwc.cpp diff --git a/composable_kernel/client_example/13_batchnorm/batchnorm_infer_nhwc.cpp b/client_example/13_batchnorm/batchnorm_infer_nhwc.cpp similarity index 100% rename from composable_kernel/client_example/13_batchnorm/batchnorm_infer_nhwc.cpp rename to client_example/13_batchnorm/batchnorm_infer_nhwc.cpp diff --git a/composable_kernel/client_example/14_instance_id/CMakeLists.txt b/client_example/14_instance_id/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/14_instance_id/CMakeLists.txt rename to client_example/14_instance_id/CMakeLists.txt diff --git a/composable_kernel/client_example/14_instance_id/batchnorm_fwd_instance_id.cpp b/client_example/14_instance_id/batchnorm_fwd_instance_id.cpp similarity index 100% rename from composable_kernel/client_example/14_instance_id/batchnorm_fwd_instance_id.cpp rename to client_example/14_instance_id/batchnorm_fwd_instance_id.cpp diff --git a/composable_kernel/client_example/15_convnd_bwd_data/CMakeLists.txt b/client_example/15_convnd_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/15_convnd_bwd_data/CMakeLists.txt rename to client_example/15_convnd_bwd_data/CMakeLists.txt diff --git a/composable_kernel/client_example/15_convnd_bwd_data/common.hpp b/client_example/15_convnd_bwd_data/common.hpp similarity index 100% rename from composable_kernel/client_example/15_convnd_bwd_data/common.hpp rename to client_example/15_convnd_bwd_data/common.hpp diff --git a/composable_kernel/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp16.cpp b/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp16.cpp similarity index 100% rename from composable_kernel/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp16.cpp rename to client_example/15_convnd_bwd_data/conv3d_bwd_data_fp16.cpp diff --git a/composable_kernel/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp32.cpp b/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp32.cpp similarity index 100% rename from composable_kernel/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp32.cpp rename to client_example/15_convnd_bwd_data/conv3d_bwd_data_fp32.cpp diff --git a/composable_kernel/client_example/15_gemm_add_multiply/CMakeLists.txt b/client_example/15_gemm_add_multiply/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/15_gemm_add_multiply/CMakeLists.txt rename to client_example/15_gemm_add_multiply/CMakeLists.txt diff --git a/composable_kernel/client_example/15_gemm_add_multiply/gemm_add_multiply.cpp b/client_example/15_gemm_add_multiply/gemm_add_multiply.cpp similarity index 100% rename from composable_kernel/client_example/15_gemm_add_multiply/gemm_add_multiply.cpp rename to client_example/15_gemm_add_multiply/gemm_add_multiply.cpp diff --git a/composable_kernel/client_example/15_reduce/CMakeLists.txt b/client_example/15_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/15_reduce/CMakeLists.txt rename to client_example/15_reduce/CMakeLists.txt diff --git a/composable_kernel/client_example/15_reduce/reduce_nhwc_c.cpp b/client_example/15_reduce/reduce_nhwc_c.cpp similarity index 100% rename from composable_kernel/client_example/15_reduce/reduce_nhwc_c.cpp rename to client_example/15_reduce/reduce_nhwc_c.cpp diff --git a/composable_kernel/client_example/16_convnd_fwd/CMakeLists.txt b/client_example/16_convnd_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/16_convnd_fwd/CMakeLists.txt rename to client_example/16_convnd_fwd/CMakeLists.txt diff --git a/composable_kernel/client_example/16_convnd_fwd/common.hpp b/client_example/16_convnd_fwd/common.hpp similarity index 100% rename from composable_kernel/client_example/16_convnd_fwd/common.hpp rename to client_example/16_convnd_fwd/common.hpp diff --git a/composable_kernel/client_example/16_convnd_fwd/conv3d_fwd_fp16.cpp b/client_example/16_convnd_fwd/conv3d_fwd_fp16.cpp similarity index 100% rename from composable_kernel/client_example/16_convnd_fwd/conv3d_fwd_fp16.cpp rename to client_example/16_convnd_fwd/conv3d_fwd_fp16.cpp diff --git a/composable_kernel/client_example/16_convnd_fwd/conv3d_fwd_fp16_comp_fp8.cpp b/client_example/16_convnd_fwd/conv3d_fwd_fp16_comp_fp8.cpp similarity index 100% rename from composable_kernel/client_example/16_convnd_fwd/conv3d_fwd_fp16_comp_fp8.cpp rename to client_example/16_convnd_fwd/conv3d_fwd_fp16_comp_fp8.cpp diff --git a/composable_kernel/client_example/16_convnd_fwd/conv3d_fwd_fp32.cpp b/client_example/16_convnd_fwd/conv3d_fwd_fp32.cpp similarity index 100% rename from composable_kernel/client_example/16_convnd_fwd/conv3d_fwd_fp32.cpp rename to client_example/16_convnd_fwd/conv3d_fwd_fp32.cpp diff --git a/composable_kernel/client_example/17_grouped_gemm_fastgelu/CMakeLists.txt b/client_example/17_grouped_gemm_fastgelu/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/17_grouped_gemm_fastgelu/CMakeLists.txt rename to client_example/17_grouped_gemm_fastgelu/CMakeLists.txt diff --git a/composable_kernel/client_example/17_grouped_gemm_fastgelu/grouped_gemm_fastgelu.cpp b/client_example/17_grouped_gemm_fastgelu/grouped_gemm_fastgelu.cpp similarity index 100% rename from composable_kernel/client_example/17_grouped_gemm_fastgelu/grouped_gemm_fastgelu.cpp rename to client_example/17_grouped_gemm_fastgelu/grouped_gemm_fastgelu.cpp diff --git a/composable_kernel/client_example/18_groupnorm/CMakeLists.txt b/client_example/18_groupnorm/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/18_groupnorm/CMakeLists.txt rename to client_example/18_groupnorm/CMakeLists.txt diff --git a/composable_kernel/client_example/18_groupnorm/groupnorm_swish.cpp b/client_example/18_groupnorm/groupnorm_swish.cpp similarity index 100% rename from composable_kernel/client_example/18_groupnorm/groupnorm_swish.cpp rename to client_example/18_groupnorm/groupnorm_swish.cpp diff --git a/composable_kernel/client_example/19_pool/CMakeLists.txt b/client_example/19_pool/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/19_pool/CMakeLists.txt rename to client_example/19_pool/CMakeLists.txt diff --git a/composable_kernel/client_example/19_pool/avg_pool3d_bwd.cpp b/client_example/19_pool/avg_pool3d_bwd.cpp similarity index 100% rename from composable_kernel/client_example/19_pool/avg_pool3d_bwd.cpp rename to client_example/19_pool/avg_pool3d_bwd.cpp diff --git a/composable_kernel/client_example/19_pool/avg_pool3d_fwd.cpp b/client_example/19_pool/avg_pool3d_fwd.cpp similarity index 100% rename from composable_kernel/client_example/19_pool/avg_pool3d_fwd.cpp rename to client_example/19_pool/avg_pool3d_fwd.cpp diff --git a/composable_kernel/client_example/19_pool/max_pool2d_bwd.cpp b/client_example/19_pool/max_pool2d_bwd.cpp similarity index 100% rename from composable_kernel/client_example/19_pool/max_pool2d_bwd.cpp rename to client_example/19_pool/max_pool2d_bwd.cpp diff --git a/composable_kernel/client_example/19_pool/max_pool2d_fwd.cpp b/client_example/19_pool/max_pool2d_fwd.cpp similarity index 100% rename from composable_kernel/client_example/19_pool/max_pool2d_fwd.cpp rename to client_example/19_pool/max_pool2d_fwd.cpp diff --git a/composable_kernel/client_example/20_splitk_gemm/CMakeLists.txt b/client_example/20_splitk_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/20_splitk_gemm/CMakeLists.txt rename to client_example/20_splitk_gemm/CMakeLists.txt diff --git a/composable_kernel/client_example/20_splitk_gemm/splitK_gemm_fp16_f8.cpp b/client_example/20_splitk_gemm/splitK_gemm_fp16_f8.cpp similarity index 100% rename from composable_kernel/client_example/20_splitk_gemm/splitK_gemm_fp16_f8.cpp rename to client_example/20_splitk_gemm/splitK_gemm_fp16_f8.cpp diff --git a/composable_kernel/client_example/21_grouped_gemm_bias/CMakeLists.txt b/client_example/21_grouped_gemm_bias/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/21_grouped_gemm_bias/CMakeLists.txt rename to client_example/21_grouped_gemm_bias/CMakeLists.txt diff --git a/composable_kernel/client_example/21_grouped_gemm_bias/grouped_gemm_fixed_nk_bias_fp16.cpp b/client_example/21_grouped_gemm_bias/grouped_gemm_fixed_nk_bias_fp16.cpp similarity index 100% rename from composable_kernel/client_example/21_grouped_gemm_bias/grouped_gemm_fixed_nk_bias_fp16.cpp rename to client_example/21_grouped_gemm_bias/grouped_gemm_fixed_nk_bias_fp16.cpp diff --git a/composable_kernel/client_example/22_grouped_gemm/CMakeLists.txt b/client_example/22_grouped_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/22_grouped_gemm/CMakeLists.txt rename to client_example/22_grouped_gemm/CMakeLists.txt diff --git a/composable_kernel/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp16.cpp b/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp16.cpp similarity index 100% rename from composable_kernel/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp16.cpp rename to client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp16.cpp diff --git a/composable_kernel/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp8.cpp b/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp8.cpp similarity index 100% rename from composable_kernel/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp8.cpp rename to client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp8.cpp diff --git a/composable_kernel/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_i8.cpp b/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_i8.cpp similarity index 100% rename from composable_kernel/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_i8.cpp rename to client_example/22_grouped_gemm/grouped_gemm_fixed_nk_i8.cpp diff --git a/composable_kernel/client_example/22_im2col_col2im/CMakeLists.txt b/client_example/22_im2col_col2im/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/22_im2col_col2im/CMakeLists.txt rename to client_example/22_im2col_col2im/CMakeLists.txt diff --git a/composable_kernel/client_example/22_im2col_col2im/column_to_image.cpp b/client_example/22_im2col_col2im/column_to_image.cpp similarity index 100% rename from composable_kernel/client_example/22_im2col_col2im/column_to_image.cpp rename to client_example/22_im2col_col2im/column_to_image.cpp diff --git a/composable_kernel/client_example/22_im2col_col2im/image_to_column.cpp b/client_example/22_im2col_col2im/image_to_column.cpp similarity index 100% rename from composable_kernel/client_example/22_im2col_col2im/image_to_column.cpp rename to client_example/22_im2col_col2im/image_to_column.cpp diff --git a/composable_kernel/client_example/CMakeLists.txt b/client_example/CMakeLists.txt similarity index 100% rename from composable_kernel/client_example/CMakeLists.txt rename to client_example/CMakeLists.txt diff --git a/composable_kernel/client_example/README.md b/client_example/README.md similarity index 100% rename from composable_kernel/client_example/README.md rename to client_example/README.md diff --git a/composable_kernel/cmake/Analyzers.cmake b/cmake/Analyzers.cmake similarity index 100% rename from composable_kernel/cmake/Analyzers.cmake rename to cmake/Analyzers.cmake diff --git a/composable_kernel/cmake/ClangTidy.cmake b/cmake/ClangTidy.cmake similarity index 100% rename from composable_kernel/cmake/ClangTidy.cmake rename to cmake/ClangTidy.cmake diff --git a/composable_kernel/cmake/CppCheck.cmake b/cmake/CppCheck.cmake similarity index 100% rename from composable_kernel/cmake/CppCheck.cmake rename to cmake/CppCheck.cmake diff --git a/composable_kernel/cmake/DoxygenDoc.cmake b/cmake/DoxygenDoc.cmake similarity index 100% rename from composable_kernel/cmake/DoxygenDoc.cmake rename to cmake/DoxygenDoc.cmake diff --git a/composable_kernel/cmake/EnableCompilerWarnings.cmake b/cmake/EnableCompilerWarnings.cmake similarity index 100% rename from composable_kernel/cmake/EnableCompilerWarnings.cmake rename to cmake/EnableCompilerWarnings.cmake diff --git a/composable_kernel/cmake/TargetFlags.cmake b/cmake/TargetFlags.cmake similarity index 100% rename from composable_kernel/cmake/TargetFlags.cmake rename to cmake/TargetFlags.cmake diff --git a/composable_kernel/cmake/googletest.cmake b/cmake/googletest.cmake similarity index 100% rename from composable_kernel/cmake/googletest.cmake rename to cmake/googletest.cmake diff --git a/composable_kernel/dev-requirements.txt b/dev-requirements.txt similarity index 100% rename from composable_kernel/dev-requirements.txt rename to dev-requirements.txt diff --git a/composable_kernel/docs/API_Reference_Guide.rst b/docs/API_Reference_Guide.rst similarity index 100% rename from composable_kernel/docs/API_Reference_Guide.rst rename to docs/API_Reference_Guide.rst diff --git a/composable_kernel/docs/Contributors_Guide.rst b/docs/Contributors_Guide.rst similarity index 100% rename from composable_kernel/docs/Contributors_Guide.rst rename to docs/Contributors_Guide.rst diff --git a/composable_kernel/docs/Supported_Primitives_Guide.rst b/docs/Supported_Primitives_Guide.rst similarity index 100% rename from composable_kernel/docs/Supported_Primitives_Guide.rst rename to docs/Supported_Primitives_Guide.rst diff --git a/composable_kernel/docs/conf.py b/docs/conf.py similarity index 100% rename from composable_kernel/docs/conf.py rename to docs/conf.py diff --git a/composable_kernel/docs/data/ck_component.png b/docs/data/ck_component.png similarity index 100% rename from composable_kernel/docs/data/ck_component.png rename to docs/data/ck_component.png diff --git a/composable_kernel/docs/data/ck_layer.png b/docs/data/ck_layer.png similarity index 100% rename from composable_kernel/docs/data/ck_layer.png rename to docs/data/ck_layer.png diff --git a/composable_kernel/docs/dockerhub.rst b/docs/dockerhub.rst similarity index 100% rename from composable_kernel/docs/dockerhub.rst rename to docs/dockerhub.rst diff --git a/composable_kernel/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile similarity index 100% rename from composable_kernel/docs/doxygen/Doxyfile rename to docs/doxygen/Doxyfile diff --git a/composable_kernel/docs/index.rst b/docs/index.rst similarity index 100% rename from composable_kernel/docs/index.rst rename to docs/index.rst diff --git a/composable_kernel/docs/license.rst b/docs/license.rst similarity index 100% rename from composable_kernel/docs/license.rst rename to docs/license.rst diff --git a/composable_kernel/docs/refs.bib b/docs/refs.bib similarity index 100% rename from composable_kernel/docs/refs.bib rename to docs/refs.bib diff --git a/composable_kernel/docs/sphinx/_toc.yml.in b/docs/sphinx/_toc.yml.in similarity index 100% rename from composable_kernel/docs/sphinx/_toc.yml.in rename to docs/sphinx/_toc.yml.in diff --git a/composable_kernel/docs/sphinx/requirements.in b/docs/sphinx/requirements.in similarity index 100% rename from composable_kernel/docs/sphinx/requirements.in rename to docs/sphinx/requirements.in diff --git a/composable_kernel/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt similarity index 100% rename from composable_kernel/docs/sphinx/requirements.txt rename to docs/sphinx/requirements.txt diff --git a/composable_kernel/docs/tutorial_hello_world.rst b/docs/tutorial_hello_world.rst similarity index 100% rename from composable_kernel/docs/tutorial_hello_world.rst rename to docs/tutorial_hello_world.rst diff --git a/composable_kernel/example/01_gemm/CMakeLists.txt b/example/01_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/01_gemm/CMakeLists.txt rename to example/01_gemm/CMakeLists.txt diff --git a/composable_kernel/example/01_gemm/README.md b/example/01_gemm/README.md similarity index 100% rename from composable_kernel/example/01_gemm/README.md rename to example/01_gemm/README.md diff --git a/composable_kernel/example/01_gemm/common.hpp b/example/01_gemm/common.hpp similarity index 100% rename from composable_kernel/example/01_gemm/common.hpp rename to example/01_gemm/common.hpp diff --git a/composable_kernel/example/01_gemm/gemm_dl_fp16.cpp b/example/01_gemm/gemm_dl_fp16.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_dl_fp16.cpp rename to example/01_gemm/gemm_dl_fp16.cpp diff --git a/composable_kernel/example/01_gemm/gemm_dl_fp32.cpp b/example/01_gemm/gemm_dl_fp32.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_dl_fp32.cpp rename to example/01_gemm/gemm_dl_fp32.cpp diff --git a/composable_kernel/example/01_gemm/gemm_dl_int4.cpp b/example/01_gemm/gemm_dl_int4.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_dl_int4.cpp rename to example/01_gemm/gemm_dl_int4.cpp diff --git a/composable_kernel/example/01_gemm/gemm_dl_int8.cpp b/example/01_gemm/gemm_dl_int8.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_dl_int8.cpp rename to example/01_gemm/gemm_dl_int8.cpp diff --git a/composable_kernel/example/01_gemm/gemm_dpp_fp16.cpp b/example/01_gemm/gemm_dpp_fp16.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_dpp_fp16.cpp rename to example/01_gemm/gemm_dpp_fp16.cpp diff --git a/composable_kernel/example/01_gemm/gemm_wmma_fp16.cpp b/example/01_gemm/gemm_wmma_fp16.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_wmma_fp16.cpp rename to example/01_gemm/gemm_wmma_fp16.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_bf16.cpp b/example/01_gemm/gemm_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_bf16.cpp rename to example/01_gemm/gemm_xdl_bf16.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_bf16_rtn.cpp b/example/01_gemm/gemm_xdl_bf16_rtn.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_bf16_rtn.cpp rename to example/01_gemm/gemm_xdl_bf16_rtn.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_fp16.cpp b/example/01_gemm/gemm_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_fp16.cpp rename to example/01_gemm/gemm_xdl_fp16.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_fp16_fp8.cpp b/example/01_gemm/gemm_xdl_fp16_fp8.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_fp16_fp8.cpp rename to example/01_gemm/gemm_xdl_fp16_fp8.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_fp64.cpp b/example/01_gemm/gemm_xdl_fp64.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_fp64.cpp rename to example/01_gemm/gemm_xdl_fp64.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_fp8.cpp b/example/01_gemm/gemm_xdl_fp8.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_fp8.cpp rename to example/01_gemm/gemm_xdl_fp8.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_fp8_bf8.cpp b/example/01_gemm/gemm_xdl_fp8_bf8.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_fp8_bf8.cpp rename to example/01_gemm/gemm_xdl_fp8_bf8.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_int4.cpp b/example/01_gemm/gemm_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_int4.cpp rename to example/01_gemm/gemm_xdl_int4.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_int8.cpp b/example/01_gemm/gemm_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_int8.cpp rename to example/01_gemm/gemm_xdl_int8.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp b/example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp rename to example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_streamk.cpp b/example/01_gemm/gemm_xdl_streamk.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_streamk.cpp rename to example/01_gemm/gemm_xdl_streamk.cpp diff --git a/composable_kernel/example/01_gemm/gemm_xdl_wavelet_fp16.cpp b/example/01_gemm/gemm_xdl_wavelet_fp16.cpp similarity index 100% rename from composable_kernel/example/01_gemm/gemm_xdl_wavelet_fp16.cpp rename to example/01_gemm/gemm_xdl_wavelet_fp16.cpp diff --git a/composable_kernel/example/01_gemm/run_gemm_example.inc b/example/01_gemm/run_gemm_example.inc similarity index 100% rename from composable_kernel/example/01_gemm/run_gemm_example.inc rename to example/01_gemm/run_gemm_example.inc diff --git a/composable_kernel/example/02_gemm_bilinear/CMakeLists.txt b/example/02_gemm_bilinear/CMakeLists.txt similarity index 100% rename from composable_kernel/example/02_gemm_bilinear/CMakeLists.txt rename to example/02_gemm_bilinear/CMakeLists.txt diff --git a/composable_kernel/example/02_gemm_bilinear/README.md b/example/02_gemm_bilinear/README.md similarity index 100% rename from composable_kernel/example/02_gemm_bilinear/README.md rename to example/02_gemm_bilinear/README.md diff --git a/composable_kernel/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp b/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp similarity index 100% rename from composable_kernel/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp rename to example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp diff --git a/composable_kernel/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp b/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp similarity index 100% rename from composable_kernel/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp rename to example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp diff --git a/composable_kernel/example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp b/example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp rename to example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp diff --git a/composable_kernel/example/03_gemm_bias_relu/CMakeLists.txt b/example/03_gemm_bias_relu/CMakeLists.txt similarity index 100% rename from composable_kernel/example/03_gemm_bias_relu/CMakeLists.txt rename to example/03_gemm_bias_relu/CMakeLists.txt diff --git a/composable_kernel/example/03_gemm_bias_relu/README.md b/example/03_gemm_bias_relu/README.md similarity index 100% rename from composable_kernel/example/03_gemm_bias_relu/README.md rename to example/03_gemm_bias_relu/README.md diff --git a/composable_kernel/example/03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp b/example/03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp rename to example/03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/CMakeLists.txt b/example/04_gemm_add_add_fastgelu/CMakeLists.txt similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/CMakeLists.txt rename to example/04_gemm_add_add_fastgelu/CMakeLists.txt diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/README.md b/example/04_gemm_add_add_fastgelu/README.md similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/README.md rename to example/04_gemm_add_add_fastgelu/README.md diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/common.hpp b/example/04_gemm_add_add_fastgelu/common.hpp similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/common.hpp rename to example/04_gemm_add_add_fastgelu/common.hpp diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_bf16.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_bf16.cpp rename to example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_bf16.cpp diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp16.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp16.cpp rename to example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp16.cpp diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp32.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp32.cpp rename to example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp32.cpp diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int4.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int4.cpp rename to example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int4.cpp diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int8.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int8.cpp rename to example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int8.cpp diff --git a/composable_kernel/example/04_gemm_add_add_fastgelu/run_gemm_add_add_fastgelu_example.inc b/example/04_gemm_add_add_fastgelu/run_gemm_add_add_fastgelu_example.inc similarity index 100% rename from composable_kernel/example/04_gemm_add_add_fastgelu/run_gemm_add_add_fastgelu_example.inc rename to example/04_gemm_add_add_fastgelu/run_gemm_add_add_fastgelu_example.inc diff --git a/composable_kernel/example/09_convnd_fwd/CMakeLists.txt b/example/09_convnd_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/example/09_convnd_fwd/CMakeLists.txt rename to example/09_convnd_fwd/CMakeLists.txt diff --git a/composable_kernel/example/09_convnd_fwd/README.md b/example/09_convnd_fwd/README.md similarity index 100% rename from composable_kernel/example/09_convnd_fwd/README.md rename to example/09_convnd_fwd/README.md diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_common.hpp b/example/09_convnd_fwd/convnd_fwd_common.hpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_common.hpp rename to example/09_convnd_fwd/convnd_fwd_common.hpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_dl_common.hpp b/example/09_convnd_fwd/convnd_fwd_dl_common.hpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_dl_common.hpp rename to example/09_convnd_fwd/convnd_fwd_dl_common.hpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_dl_fp16.cpp b/example/09_convnd_fwd/convnd_fwd_dl_fp16.cpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_dl_fp16.cpp rename to example/09_convnd_fwd/convnd_fwd_dl_fp16.cpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_dl_fp32.cpp b/example/09_convnd_fwd/convnd_fwd_dl_fp32.cpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_dl_fp32.cpp rename to example/09_convnd_fwd/convnd_fwd_dl_fp32.cpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_dl_int8.cpp b/example/09_convnd_fwd/convnd_fwd_dl_int8.cpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_dl_int8.cpp rename to example/09_convnd_fwd/convnd_fwd_dl_int8.cpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_bf16.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_bf16.cpp rename to example/09_convnd_fwd/convnd_fwd_xdl_bf16.cpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp rename to example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_fp32.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_fp32.cpp rename to example/09_convnd_fwd/convnd_fwd_xdl_fp32.cpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_fp64.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_fp64.cpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_fp64.cpp rename to example/09_convnd_fwd/convnd_fwd_xdl_fp64.cpp diff --git a/composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_int8.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/09_convnd_fwd/convnd_fwd_xdl_int8.cpp rename to example/09_convnd_fwd/convnd_fwd_xdl_int8.cpp diff --git a/composable_kernel/example/09_convnd_fwd/run_convnd_fwd_dl_example.inc b/example/09_convnd_fwd/run_convnd_fwd_dl_example.inc similarity index 100% rename from composable_kernel/example/09_convnd_fwd/run_convnd_fwd_dl_example.inc rename to example/09_convnd_fwd/run_convnd_fwd_dl_example.inc diff --git a/composable_kernel/example/09_convnd_fwd/run_convnd_fwd_example.inc b/example/09_convnd_fwd/run_convnd_fwd_example.inc similarity index 100% rename from composable_kernel/example/09_convnd_fwd/run_convnd_fwd_example.inc rename to example/09_convnd_fwd/run_convnd_fwd_example.inc diff --git a/composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/CMakeLists.txt b/example/10_convnd_fwd_multiple_d_multiple_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/CMakeLists.txt rename to example/10_convnd_fwd_multiple_d_multiple_reduce/CMakeLists.txt diff --git a/composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp similarity index 100% rename from composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp rename to example/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp diff --git a/composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_bf16.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_bf16.cpp rename to example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_bf16.cpp diff --git a/composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp16.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp16.cpp rename to example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp16.cpp diff --git a/composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp32.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp32.cpp rename to example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp32.cpp diff --git a/composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int4.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int4.cpp rename to example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int4.cpp diff --git a/composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int8.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int8.cpp rename to example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int8.cpp diff --git a/composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/run_convnd_fwd_max_example.inc b/example/10_convnd_fwd_multiple_d_multiple_reduce/run_convnd_fwd_max_example.inc similarity index 100% rename from composable_kernel/example/10_convnd_fwd_multiple_d_multiple_reduce/run_convnd_fwd_max_example.inc rename to example/10_convnd_fwd_multiple_d_multiple_reduce/run_convnd_fwd_max_example.inc diff --git a/composable_kernel/example/12_reduce/CMakeLists.txt b/example/12_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/example/12_reduce/CMakeLists.txt rename to example/12_reduce/CMakeLists.txt diff --git a/composable_kernel/example/12_reduce/README.md b/example/12_reduce/README.md similarity index 100% rename from composable_kernel/example/12_reduce/README.md rename to example/12_reduce/README.md diff --git a/composable_kernel/example/12_reduce/reduce_blockwise.cpp b/example/12_reduce/reduce_blockwise.cpp similarity index 100% rename from composable_kernel/example/12_reduce/reduce_blockwise.cpp rename to example/12_reduce/reduce_blockwise.cpp diff --git a/composable_kernel/example/12_reduce/reduce_blockwise_impl.hpp b/example/12_reduce/reduce_blockwise_impl.hpp similarity index 100% rename from composable_kernel/example/12_reduce/reduce_blockwise_impl.hpp rename to example/12_reduce/reduce_blockwise_impl.hpp diff --git a/composable_kernel/example/12_reduce/reduce_blockwise_two_call.cpp b/example/12_reduce/reduce_blockwise_two_call.cpp similarity index 100% rename from composable_kernel/example/12_reduce/reduce_blockwise_two_call.cpp rename to example/12_reduce/reduce_blockwise_two_call.cpp diff --git a/composable_kernel/example/12_reduce/reduce_example_common.hpp b/example/12_reduce/reduce_example_common.hpp similarity index 100% rename from composable_kernel/example/12_reduce/reduce_example_common.hpp rename to example/12_reduce/reduce_example_common.hpp diff --git a/composable_kernel/example/12_reduce/reduce_multiblock_atomic_add.cpp b/example/12_reduce/reduce_multiblock_atomic_add.cpp similarity index 100% rename from composable_kernel/example/12_reduce/reduce_multiblock_atomic_add.cpp rename to example/12_reduce/reduce_multiblock_atomic_add.cpp diff --git a/composable_kernel/example/12_reduce/reduce_multiblock_atomic_add_impl.hpp b/example/12_reduce/reduce_multiblock_atomic_add_impl.hpp similarity index 100% rename from composable_kernel/example/12_reduce/reduce_multiblock_atomic_add_impl.hpp rename to example/12_reduce/reduce_multiblock_atomic_add_impl.hpp diff --git a/composable_kernel/example/13_pool2d_fwd/CMakeLists.txt b/example/13_pool2d_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/example/13_pool2d_fwd/CMakeLists.txt rename to example/13_pool2d_fwd/CMakeLists.txt diff --git a/composable_kernel/example/13_pool2d_fwd/README.md b/example/13_pool2d_fwd/README.md similarity index 100% rename from composable_kernel/example/13_pool2d_fwd/README.md rename to example/13_pool2d_fwd/README.md diff --git a/composable_kernel/example/13_pool2d_fwd/pool2d_fwd_common.hpp b/example/13_pool2d_fwd/pool2d_fwd_common.hpp similarity index 100% rename from composable_kernel/example/13_pool2d_fwd/pool2d_fwd_common.hpp rename to example/13_pool2d_fwd/pool2d_fwd_common.hpp diff --git a/composable_kernel/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp b/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp similarity index 100% rename from composable_kernel/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp rename to example/13_pool2d_fwd/pool2d_fwd_fp16.cpp diff --git a/composable_kernel/example/13_pool2d_fwd/pool2d_fwd_fp32.cpp b/example/13_pool2d_fwd/pool2d_fwd_fp32.cpp similarity index 100% rename from composable_kernel/example/13_pool2d_fwd/pool2d_fwd_fp32.cpp rename to example/13_pool2d_fwd/pool2d_fwd_fp32.cpp diff --git a/composable_kernel/example/14_gemm_quantization/CMakeLists.txt b/example/14_gemm_quantization/CMakeLists.txt similarity index 100% rename from composable_kernel/example/14_gemm_quantization/CMakeLists.txt rename to example/14_gemm_quantization/CMakeLists.txt diff --git a/composable_kernel/example/14_gemm_quantization/gemm_dl_quantization_int8.cpp b/example/14_gemm_quantization/gemm_dl_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/14_gemm_quantization/gemm_dl_quantization_int8.cpp rename to example/14_gemm_quantization/gemm_dl_quantization_int8.cpp diff --git a/composable_kernel/example/14_gemm_quantization/gemm_xdl_bias_relu_quantization_int8.cpp b/example/14_gemm_quantization/gemm_xdl_bias_relu_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/14_gemm_quantization/gemm_xdl_bias_relu_quantization_int8.cpp rename to example/14_gemm_quantization/gemm_xdl_bias_relu_quantization_int8.cpp diff --git a/composable_kernel/example/14_gemm_quantization/gemm_xdl_quantization_int8.cpp b/example/14_gemm_quantization/gemm_xdl_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/14_gemm_quantization/gemm_xdl_quantization_int8.cpp rename to example/14_gemm_quantization/gemm_xdl_quantization_int8.cpp diff --git a/composable_kernel/example/15_grouped_gemm/CMakeLists.txt b/example/15_grouped_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/15_grouped_gemm/CMakeLists.txt rename to example/15_grouped_gemm/CMakeLists.txt diff --git a/composable_kernel/example/15_grouped_gemm/README.md b/example/15_grouped_gemm/README.md similarity index 100% rename from composable_kernel/example/15_grouped_gemm/README.md rename to example/15_grouped_gemm/README.md diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_multiple_d_dl_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_multiple_d_dl_fp16.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_multiple_d_dl_fp16.cpp rename to example/15_grouped_gemm/grouped_gemm_multiple_d_dl_fp16.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_bf16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_bf16.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_bf16.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp8.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp8.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp8.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp8.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fp32.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_fp32.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_fp32.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_int4.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_int4.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_int4.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_int8.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_int8.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_int8.cpp diff --git a/composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_splitk_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_splitk_fp16.cpp similarity index 100% rename from composable_kernel/example/15_grouped_gemm/grouped_gemm_xdl_splitk_fp16.cpp rename to example/15_grouped_gemm/grouped_gemm_xdl_splitk_fp16.cpp diff --git a/composable_kernel/example/15_grouped_gemm/run_grouped_gemm_example.inc b/example/15_grouped_gemm/run_grouped_gemm_example.inc similarity index 100% rename from composable_kernel/example/15_grouped_gemm/run_grouped_gemm_example.inc rename to example/15_grouped_gemm/run_grouped_gemm_example.inc diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/CMakeLists.txt b/example/16_gemm_multi_d_multi_reduces/CMakeLists.txt similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/CMakeLists.txt rename to example/16_gemm_multi_d_multi_reduces/CMakeLists.txt diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp rename to example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp diff --git a/composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp b/example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp similarity index 100% rename from composable_kernel/example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp rename to example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp diff --git a/composable_kernel/example/17_convnd_bwd_data/CMakeLists.txt b/example/17_convnd_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/example/17_convnd_bwd_data/CMakeLists.txt rename to example/17_convnd_bwd_data/CMakeLists.txt diff --git a/composable_kernel/example/17_convnd_bwd_data/README.md b/example/17_convnd_bwd_data/README.md similarity index 100% rename from composable_kernel/example/17_convnd_bwd_data/README.md rename to example/17_convnd_bwd_data/README.md diff --git a/composable_kernel/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp b/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp similarity index 100% rename from composable_kernel/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp rename to example/17_convnd_bwd_data/convnd_bwd_data_common.hpp diff --git a/composable_kernel/example/17_convnd_bwd_data/convnd_bwd_data_dl_fp16.cpp b/example/17_convnd_bwd_data/convnd_bwd_data_dl_fp16.cpp similarity index 100% rename from composable_kernel/example/17_convnd_bwd_data/convnd_bwd_data_dl_fp16.cpp rename to example/17_convnd_bwd_data/convnd_bwd_data_dl_fp16.cpp diff --git a/composable_kernel/example/17_convnd_bwd_data/convnd_bwd_data_xdl_fp16.cpp b/example/17_convnd_bwd_data/convnd_bwd_data_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/17_convnd_bwd_data/convnd_bwd_data_xdl_fp16.cpp rename to example/17_convnd_bwd_data/convnd_bwd_data_xdl_fp16.cpp diff --git a/composable_kernel/example/18_batched_gemm_reduce/CMakeLists.txt b/example/18_batched_gemm_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/example/18_batched_gemm_reduce/CMakeLists.txt rename to example/18_batched_gemm_reduce/CMakeLists.txt diff --git a/composable_kernel/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp b/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp rename to example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp diff --git a/composable_kernel/example/19_binary_elementwise/CMakeLists.txt b/example/19_binary_elementwise/CMakeLists.txt similarity index 100% rename from composable_kernel/example/19_binary_elementwise/CMakeLists.txt rename to example/19_binary_elementwise/CMakeLists.txt diff --git a/composable_kernel/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp b/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp similarity index 100% rename from composable_kernel/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp rename to example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp diff --git a/composable_kernel/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp b/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp similarity index 100% rename from composable_kernel/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp rename to example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp diff --git a/composable_kernel/example/19_binary_elementwise/elementwise_add_1d.cpp b/example/19_binary_elementwise/elementwise_add_1d.cpp similarity index 100% rename from composable_kernel/example/19_binary_elementwise/elementwise_add_1d.cpp rename to example/19_binary_elementwise/elementwise_add_1d.cpp diff --git a/composable_kernel/example/19_binary_elementwise/elementwise_add_4d.cpp b/example/19_binary_elementwise/elementwise_add_4d.cpp similarity index 100% rename from composable_kernel/example/19_binary_elementwise/elementwise_add_4d.cpp rename to example/19_binary_elementwise/elementwise_add_4d.cpp diff --git a/composable_kernel/example/20_grouped_conv_bwd_weight/CMakeLists.txt b/example/20_grouped_conv_bwd_weight/CMakeLists.txt similarity index 100% rename from composable_kernel/example/20_grouped_conv_bwd_weight/CMakeLists.txt rename to example/20_grouped_conv_bwd_weight/CMakeLists.txt diff --git a/composable_kernel/example/20_grouped_conv_bwd_weight/common.hpp b/example/20_grouped_conv_bwd_weight/common.hpp similarity index 100% rename from composable_kernel/example/20_grouped_conv_bwd_weight/common.hpp rename to example/20_grouped_conv_bwd_weight/common.hpp diff --git a/composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_dl_fp16.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_dl_fp16.cpp similarity index 100% rename from composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_dl_fp16.cpp rename to example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_dl_fp16.cpp diff --git a/composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_wmma_fp16.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_wmma_fp16.cpp similarity index 100% rename from composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_wmma_fp16.cpp rename to example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_wmma_fp16.cpp diff --git a/composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_bf16.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_bf16.cpp rename to example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_bf16.cpp diff --git a/composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16.cpp rename to example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16.cpp diff --git a/composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp similarity index 100% rename from composable_kernel/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp rename to example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp diff --git a/composable_kernel/example/20_grouped_conv_bwd_weight/run_grouped_conv_bwd_weight_example.inc b/example/20_grouped_conv_bwd_weight/run_grouped_conv_bwd_weight_example.inc similarity index 100% rename from composable_kernel/example/20_grouped_conv_bwd_weight/run_grouped_conv_bwd_weight_example.inc rename to example/20_grouped_conv_bwd_weight/run_grouped_conv_bwd_weight_example.inc diff --git a/composable_kernel/example/21_gemm_layernorm/CMakeLists.txt b/example/21_gemm_layernorm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/21_gemm_layernorm/CMakeLists.txt rename to example/21_gemm_layernorm/CMakeLists.txt diff --git a/composable_kernel/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp b/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp similarity index 100% rename from composable_kernel/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp rename to example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp diff --git a/composable_kernel/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp b/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp similarity index 100% rename from composable_kernel/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp rename to example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp diff --git a/composable_kernel/example/21_gemm_layernorm/gemm_layernorm_xdl_naive_fp16.cpp b/example/21_gemm_layernorm/gemm_layernorm_xdl_naive_fp16.cpp similarity index 100% rename from composable_kernel/example/21_gemm_layernorm/gemm_layernorm_xdl_naive_fp16.cpp rename to example/21_gemm_layernorm/gemm_layernorm_xdl_naive_fp16.cpp diff --git a/composable_kernel/example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp b/example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp similarity index 100% rename from composable_kernel/example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp rename to example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp diff --git a/composable_kernel/example/22_cgemm/CMakeLists.txt b/example/22_cgemm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/22_cgemm/CMakeLists.txt rename to example/22_cgemm/CMakeLists.txt diff --git a/composable_kernel/example/22_cgemm/cgemm_xdl_bf16.cpp b/example/22_cgemm/cgemm_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/22_cgemm/cgemm_xdl_bf16.cpp rename to example/22_cgemm/cgemm_xdl_bf16.cpp diff --git a/composable_kernel/example/22_cgemm/cgemm_xdl_common.hpp b/example/22_cgemm/cgemm_xdl_common.hpp similarity index 100% rename from composable_kernel/example/22_cgemm/cgemm_xdl_common.hpp rename to example/22_cgemm/cgemm_xdl_common.hpp diff --git a/composable_kernel/example/22_cgemm/cgemm_xdl_fp16.cpp b/example/22_cgemm/cgemm_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/22_cgemm/cgemm_xdl_fp16.cpp rename to example/22_cgemm/cgemm_xdl_fp16.cpp diff --git a/composable_kernel/example/22_cgemm/cgemm_xdl_fp32.cpp b/example/22_cgemm/cgemm_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/22_cgemm/cgemm_xdl_fp32.cpp rename to example/22_cgemm/cgemm_xdl_fp32.cpp diff --git a/composable_kernel/example/22_cgemm/cgemm_xdl_int4.cpp b/example/22_cgemm/cgemm_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/22_cgemm/cgemm_xdl_int4.cpp rename to example/22_cgemm/cgemm_xdl_int4.cpp diff --git a/composable_kernel/example/22_cgemm/cgemm_xdl_int8.cpp b/example/22_cgemm/cgemm_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/22_cgemm/cgemm_xdl_int8.cpp rename to example/22_cgemm/cgemm_xdl_int8.cpp diff --git a/composable_kernel/example/23_softmax/CMakeLists.txt b/example/23_softmax/CMakeLists.txt similarity index 100% rename from composable_kernel/example/23_softmax/CMakeLists.txt rename to example/23_softmax/CMakeLists.txt diff --git a/composable_kernel/example/23_softmax/README.md b/example/23_softmax/README.md similarity index 100% rename from composable_kernel/example/23_softmax/README.md rename to example/23_softmax/README.md diff --git a/composable_kernel/example/23_softmax/softmax_blockwise.cpp b/example/23_softmax/softmax_blockwise.cpp similarity index 100% rename from composable_kernel/example/23_softmax/softmax_blockwise.cpp rename to example/23_softmax/softmax_blockwise.cpp diff --git a/composable_kernel/example/24_batched_gemm/CMakeLists.txt b/example/24_batched_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/24_batched_gemm/CMakeLists.txt rename to example/24_batched_gemm/CMakeLists.txt diff --git a/composable_kernel/example/24_batched_gemm/batched_gemm_xdl_bf16.cpp b/example/24_batched_gemm/batched_gemm_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/24_batched_gemm/batched_gemm_xdl_bf16.cpp rename to example/24_batched_gemm/batched_gemm_xdl_bf16.cpp diff --git a/composable_kernel/example/24_batched_gemm/batched_gemm_xdl_fp16.cpp b/example/24_batched_gemm/batched_gemm_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/24_batched_gemm/batched_gemm_xdl_fp16.cpp rename to example/24_batched_gemm/batched_gemm_xdl_fp16.cpp diff --git a/composable_kernel/example/24_batched_gemm/batched_gemm_xdl_fp32.cpp b/example/24_batched_gemm/batched_gemm_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/24_batched_gemm/batched_gemm_xdl_fp32.cpp rename to example/24_batched_gemm/batched_gemm_xdl_fp32.cpp diff --git a/composable_kernel/example/24_batched_gemm/batched_gemm_xdl_int4.cpp b/example/24_batched_gemm/batched_gemm_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/24_batched_gemm/batched_gemm_xdl_int4.cpp rename to example/24_batched_gemm/batched_gemm_xdl_int4.cpp diff --git a/composable_kernel/example/24_batched_gemm/batched_gemm_xdl_int8.cpp b/example/24_batched_gemm/batched_gemm_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/24_batched_gemm/batched_gemm_xdl_int8.cpp rename to example/24_batched_gemm/batched_gemm_xdl_int8.cpp diff --git a/composable_kernel/example/24_batched_gemm/run_batched_gemm_example.inc b/example/24_batched_gemm/run_batched_gemm_example.inc similarity index 100% rename from composable_kernel/example/24_batched_gemm/run_batched_gemm_example.inc rename to example/24_batched_gemm/run_batched_gemm_example.inc diff --git a/composable_kernel/example/25_gemm_bias_e_permute/CMakeLists.txt b/example/25_gemm_bias_e_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/example/25_gemm_bias_e_permute/CMakeLists.txt rename to example/25_gemm_bias_e_permute/CMakeLists.txt diff --git a/composable_kernel/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp b/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp rename to example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp diff --git a/composable_kernel/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp b/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp rename to example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp diff --git a/composable_kernel/example/26_contraction/CMakeLists.txt b/example/26_contraction/CMakeLists.txt similarity index 100% rename from composable_kernel/example/26_contraction/CMakeLists.txt rename to example/26_contraction/CMakeLists.txt diff --git a/composable_kernel/example/26_contraction/README.md b/example/26_contraction/README.md similarity index 100% rename from composable_kernel/example/26_contraction/README.md rename to example/26_contraction/README.md diff --git a/composable_kernel/example/26_contraction/contraction_bilinear_xdl_fp32.cpp b/example/26_contraction/contraction_bilinear_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/26_contraction/contraction_bilinear_xdl_fp32.cpp rename to example/26_contraction/contraction_bilinear_xdl_fp32.cpp diff --git a/composable_kernel/example/26_contraction/contraction_bilinear_xdl_fp64.cpp b/example/26_contraction/contraction_bilinear_xdl_fp64.cpp similarity index 100% rename from composable_kernel/example/26_contraction/contraction_bilinear_xdl_fp64.cpp rename to example/26_contraction/contraction_bilinear_xdl_fp64.cpp diff --git a/composable_kernel/example/26_contraction/contraction_scale_xdl_fp32.cpp b/example/26_contraction/contraction_scale_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/26_contraction/contraction_scale_xdl_fp32.cpp rename to example/26_contraction/contraction_scale_xdl_fp32.cpp diff --git a/composable_kernel/example/26_contraction/contraction_scale_xdl_fp64.cpp b/example/26_contraction/contraction_scale_xdl_fp64.cpp similarity index 100% rename from composable_kernel/example/26_contraction/contraction_scale_xdl_fp64.cpp rename to example/26_contraction/contraction_scale_xdl_fp64.cpp diff --git a/composable_kernel/example/27_layernorm/CMakeLists.txt b/example/27_layernorm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/27_layernorm/CMakeLists.txt rename to example/27_layernorm/CMakeLists.txt diff --git a/composable_kernel/example/27_layernorm/common.hpp b/example/27_layernorm/common.hpp similarity index 100% rename from composable_kernel/example/27_layernorm/common.hpp rename to example/27_layernorm/common.hpp diff --git a/composable_kernel/example/27_layernorm/layernorm_fp16.cpp b/example/27_layernorm/layernorm_fp16.cpp similarity index 100% rename from composable_kernel/example/27_layernorm/layernorm_fp16.cpp rename to example/27_layernorm/layernorm_fp16.cpp diff --git a/composable_kernel/example/27_layernorm/layernorm_splitk_fp16.cpp b/example/27_layernorm/layernorm_splitk_fp16.cpp similarity index 100% rename from composable_kernel/example/27_layernorm/layernorm_splitk_fp16.cpp rename to example/27_layernorm/layernorm_splitk_fp16.cpp diff --git a/composable_kernel/example/27_layernorm/run_layernorm_example.inc b/example/27_layernorm/run_layernorm_example.inc similarity index 100% rename from composable_kernel/example/27_layernorm/run_layernorm_example.inc rename to example/27_layernorm/run_layernorm_example.inc diff --git a/composable_kernel/example/28_grouped_gemm_bias_e_permute/CMakeLists.txt b/example/28_grouped_gemm_bias_e_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/example/28_grouped_gemm_bias_e_permute/CMakeLists.txt rename to example/28_grouped_gemm_bias_e_permute/CMakeLists.txt diff --git a/composable_kernel/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp b/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp rename to example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp diff --git a/composable_kernel/example/29_batched_gemm_bias_e_permute/CMakeLists.txt b/example/29_batched_gemm_bias_e_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/example/29_batched_gemm_bias_e_permute/CMakeLists.txt rename to example/29_batched_gemm_bias_e_permute/CMakeLists.txt diff --git a/composable_kernel/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp b/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp similarity index 100% rename from composable_kernel/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp rename to example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp diff --git a/composable_kernel/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp b/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp rename to example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/CMakeLists.txt b/example/30_grouped_conv_fwd_multiple_d/CMakeLists.txt similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/CMakeLists.txt rename to example/30_grouped_conv_fwd_multiple_d/CMakeLists.txt diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/README.md b/example/30_grouped_conv_fwd_multiple_d/README.md similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/README.md rename to example/30_grouped_conv_fwd_multiple_d/README.md diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/common.hpp b/example/30_grouped_conv_fwd_multiple_d/common.hpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/common.hpp rename to example/30_grouped_conv_fwd_multiple_d/common.hpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp b/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp rename to example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_fp16.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_fp16.cpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_fp16.cpp rename to example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_fp16.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_int8.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_int8.cpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_int8.cpp rename to example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_int8.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp rename to example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp rename to example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp rename to example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int4.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int4.cpp rename to example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int4.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp rename to example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_xdl_fp16.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_xdl_fp16.cpp rename to example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_xdl_fp16.cpp diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_example.inc b/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_example.inc similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_example.inc rename to example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_example.inc diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_wmma_example.inc b/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_wmma_example.inc similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_wmma_example.inc rename to example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_wmma_example.inc diff --git a/composable_kernel/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc b/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc similarity index 100% rename from composable_kernel/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc rename to example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc diff --git a/composable_kernel/example/31_batched_gemm_gemm/CMakeLists.txt b/example/31_batched_gemm_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/31_batched_gemm_gemm/CMakeLists.txt rename to example/31_batched_gemm_gemm/CMakeLists.txt diff --git a/composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_bf16.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_bf16.cpp rename to example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_bf16.cpp diff --git a/composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp16.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp16.cpp rename to example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp16.cpp diff --git a/composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp32.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp32.cpp rename to example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp32.cpp diff --git a/composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp rename to example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp diff --git a/composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp rename to example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp diff --git a/composable_kernel/example/31_batched_gemm_gemm/run_batched_gemm_gemm_example.inc b/example/31_batched_gemm_gemm/run_batched_gemm_gemm_example.inc similarity index 100% rename from composable_kernel/example/31_batched_gemm_gemm/run_batched_gemm_gemm_example.inc rename to example/31_batched_gemm_gemm/run_batched_gemm_gemm_example.inc diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/CMakeLists.txt b/example/32_batched_gemm_scale_softmax_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/CMakeLists.txt rename to example/32_batched_gemm_scale_softmax_gemm/CMakeLists.txt diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp rename to example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp rename to example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp rename to example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp rename to example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp rename to example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp rename to example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp rename to example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm.inc b/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm.inc similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm.inc rename to example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm.inc diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc b/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc rename to example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc diff --git a/composable_kernel/example/32_batched_gemm_scale_softmax_gemm/run_grouped_gemm_scale_softmax_gemm_permute.inc b/example/32_batched_gemm_scale_softmax_gemm/run_grouped_gemm_scale_softmax_gemm_permute.inc similarity index 100% rename from composable_kernel/example/32_batched_gemm_scale_softmax_gemm/run_grouped_gemm_scale_softmax_gemm_permute.inc rename to example/32_batched_gemm_scale_softmax_gemm/run_grouped_gemm_scale_softmax_gemm_permute.inc diff --git a/composable_kernel/example/33_multiple_reduce/CMakeLists.txt b/example/33_multiple_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/example/33_multiple_reduce/CMakeLists.txt rename to example/33_multiple_reduce/CMakeLists.txt diff --git a/composable_kernel/example/33_multiple_reduce/README.md b/example/33_multiple_reduce/README.md similarity index 100% rename from composable_kernel/example/33_multiple_reduce/README.md rename to example/33_multiple_reduce/README.md diff --git a/composable_kernel/example/33_multiple_reduce/dual_reduce_common.hpp b/example/33_multiple_reduce/dual_reduce_common.hpp similarity index 100% rename from composable_kernel/example/33_multiple_reduce/dual_reduce_common.hpp rename to example/33_multiple_reduce/dual_reduce_common.hpp diff --git a/composable_kernel/example/33_multiple_reduce/dual_reduce_multiblock.cpp b/example/33_multiple_reduce/dual_reduce_multiblock.cpp similarity index 100% rename from composable_kernel/example/33_multiple_reduce/dual_reduce_multiblock.cpp rename to example/33_multiple_reduce/dual_reduce_multiblock.cpp diff --git a/composable_kernel/example/33_multiple_reduce/dual_reduce_threadwise.cpp b/example/33_multiple_reduce/dual_reduce_threadwise.cpp similarity index 100% rename from composable_kernel/example/33_multiple_reduce/dual_reduce_threadwise.cpp rename to example/33_multiple_reduce/dual_reduce_threadwise.cpp diff --git a/composable_kernel/example/34_batchnorm/CMakeLists.txt b/example/34_batchnorm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/34_batchnorm/CMakeLists.txt rename to example/34_batchnorm/CMakeLists.txt diff --git a/composable_kernel/example/34_batchnorm/README.md b/example/34_batchnorm/README.md similarity index 100% rename from composable_kernel/example/34_batchnorm/README.md rename to example/34_batchnorm/README.md diff --git a/composable_kernel/example/34_batchnorm/batchnorm_backward_nhwc.cpp b/example/34_batchnorm/batchnorm_backward_nhwc.cpp similarity index 100% rename from composable_kernel/example/34_batchnorm/batchnorm_backward_nhwc.cpp rename to example/34_batchnorm/batchnorm_backward_nhwc.cpp diff --git a/composable_kernel/example/34_batchnorm/batchnorm_common.hpp b/example/34_batchnorm/batchnorm_common.hpp similarity index 100% rename from composable_kernel/example/34_batchnorm/batchnorm_common.hpp rename to example/34_batchnorm/batchnorm_common.hpp diff --git a/composable_kernel/example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp b/example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp similarity index 100% rename from composable_kernel/example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp rename to example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp diff --git a/composable_kernel/example/34_batchnorm/batchnorm_forward_training_nhwc.cpp b/example/34_batchnorm/batchnorm_forward_training_nhwc.cpp similarity index 100% rename from composable_kernel/example/34_batchnorm/batchnorm_forward_training_nhwc.cpp rename to example/34_batchnorm/batchnorm_forward_training_nhwc.cpp diff --git a/composable_kernel/example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp b/example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp similarity index 100% rename from composable_kernel/example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp rename to example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp diff --git a/composable_kernel/example/34_batchnorm/batchnorm_infer_impl.hpp b/example/34_batchnorm/batchnorm_infer_impl.hpp similarity index 100% rename from composable_kernel/example/34_batchnorm/batchnorm_infer_impl.hpp rename to example/34_batchnorm/batchnorm_infer_impl.hpp diff --git a/composable_kernel/example/35_splitK_gemm/CMakeLists.txt b/example/35_splitK_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/35_splitK_gemm/CMakeLists.txt rename to example/35_splitK_gemm/CMakeLists.txt diff --git a/composable_kernel/example/35_splitK_gemm/run_splitK_gemm_example.inc b/example/35_splitK_gemm/run_splitK_gemm_example.inc similarity index 100% rename from composable_kernel/example/35_splitK_gemm/run_splitK_gemm_example.inc rename to example/35_splitK_gemm/run_splitK_gemm_example.inc diff --git a/composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_bf16.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_bf16.cpp rename to example/35_splitK_gemm/splitK_gemm_xdl_bf16.cpp diff --git a/composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp rename to example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp diff --git a/composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp rename to example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp diff --git a/composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp rename to example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp diff --git a/composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp rename to example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp diff --git a/composable_kernel/example/36_sparse_embedding/CMakeLists.txt b/example/36_sparse_embedding/CMakeLists.txt similarity index 100% rename from composable_kernel/example/36_sparse_embedding/CMakeLists.txt rename to example/36_sparse_embedding/CMakeLists.txt diff --git a/composable_kernel/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp b/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp similarity index 100% rename from composable_kernel/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp rename to example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp diff --git a/composable_kernel/example/37_batched_gemm_add_add_relu_gemm_add/CMakeLists.txt b/example/37_batched_gemm_add_add_relu_gemm_add/CMakeLists.txt similarity index 100% rename from composable_kernel/example/37_batched_gemm_add_add_relu_gemm_add/CMakeLists.txt rename to example/37_batched_gemm_add_add_relu_gemm_add/CMakeLists.txt diff --git a/composable_kernel/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp b/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp rename to example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp diff --git a/composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/CMakeLists.txt b/example/38_grouped_conv_bwd_data_multiple_d/CMakeLists.txt similarity index 100% rename from composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/CMakeLists.txt rename to example/38_grouped_conv_bwd_data_multiple_d/CMakeLists.txt diff --git a/composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/common.hpp b/example/38_grouped_conv_bwd_data_multiple_d/common.hpp similarity index 100% rename from composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/common.hpp rename to example/38_grouped_conv_bwd_data_multiple_d/common.hpp diff --git a/composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp b/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp rename to example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp diff --git a/composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_wmma_fp16.cpp b/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_wmma_fp16.cpp similarity index 100% rename from composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_wmma_fp16.cpp rename to example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_wmma_fp16.cpp diff --git a/composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16.cpp b/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16.cpp rename to example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16.cpp diff --git a/composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_bias_relu_example.inc b/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_bias_relu_example.inc similarity index 100% rename from composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_bias_relu_example.inc rename to example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_bias_relu_example.inc diff --git a/composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc b/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc similarity index 100% rename from composable_kernel/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc rename to example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc diff --git a/composable_kernel/example/39_permute/CMakeLists.txt b/example/39_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/example/39_permute/CMakeLists.txt rename to example/39_permute/CMakeLists.txt diff --git a/composable_kernel/example/39_permute/common.hpp b/example/39_permute/common.hpp similarity index 100% rename from composable_kernel/example/39_permute/common.hpp rename to example/39_permute/common.hpp diff --git a/composable_kernel/example/39_permute/permute_1xHxW_fp16.cpp b/example/39_permute/permute_1xHxW_fp16.cpp similarity index 100% rename from composable_kernel/example/39_permute/permute_1xHxW_fp16.cpp rename to example/39_permute/permute_1xHxW_fp16.cpp diff --git a/composable_kernel/example/39_permute/permute_HxWx4_fp16.cpp b/example/39_permute/permute_HxWx4_fp16.cpp similarity index 100% rename from composable_kernel/example/39_permute/permute_HxWx4_fp16.cpp rename to example/39_permute/permute_HxWx4_fp16.cpp diff --git a/composable_kernel/example/39_permute/permute_NxHxW_fp16.cpp b/example/39_permute/permute_NxHxW_fp16.cpp similarity index 100% rename from composable_kernel/example/39_permute/permute_NxHxW_fp16.cpp rename to example/39_permute/permute_NxHxW_fp16.cpp diff --git a/composable_kernel/example/39_permute/run_permute_bundle_example.inc b/example/39_permute/run_permute_bundle_example.inc similarity index 100% rename from composable_kernel/example/39_permute/run_permute_bundle_example.inc rename to example/39_permute/run_permute_bundle_example.inc diff --git a/composable_kernel/example/39_permute/run_permute_element_example.inc b/example/39_permute/run_permute_element_example.inc similarity index 100% rename from composable_kernel/example/39_permute/run_permute_element_example.inc rename to example/39_permute/run_permute_element_example.inc diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/CMakeLists.txt b/example/40_conv2d_fwd_quantization/CMakeLists.txt similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/CMakeLists.txt rename to example/40_conv2d_fwd_quantization/CMakeLists.txt diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/common.hpp b/example/40_conv2d_fwd_quantization/common.hpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/common.hpp rename to example/40_conv2d_fwd_quantization/common.hpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perlayer_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perlayer_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perlayer_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perlayer_quantization_int8.cpp similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perlayer_quantization_int8.cpp rename to example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perlayer_quantization_int8.cpp diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perchannel_quantization_example.inc b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perchannel_quantization_example.inc similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perchannel_quantization_example.inc rename to example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perchannel_quantization_example.inc diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perlayer_quantization_example.inc b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perlayer_quantization_example.inc similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perlayer_quantization_example.inc rename to example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perlayer_quantization_example.inc diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perchannel_quantization_example.inc b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perchannel_quantization_example.inc similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perchannel_quantization_example.inc rename to example/40_conv2d_fwd_quantization/run_conv2d_fwd_perchannel_quantization_example.inc diff --git a/composable_kernel/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc similarity index 100% rename from composable_kernel/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc rename to example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc diff --git a/composable_kernel/example/41_grouped_conv_conv_fwd/CMakeLists.txt b/example/41_grouped_conv_conv_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/example/41_grouped_conv_conv_fwd/CMakeLists.txt rename to example/41_grouped_conv_conv_fwd/CMakeLists.txt diff --git a/composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp similarity index 100% rename from composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp rename to example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp diff --git a/composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp rename to example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp diff --git a/composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp rename to example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp diff --git a/composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp similarity index 100% rename from composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp rename to example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp diff --git a/composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp similarity index 100% rename from composable_kernel/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp rename to example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp diff --git a/composable_kernel/example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc b/example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc similarity index 100% rename from composable_kernel/example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc rename to example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc diff --git a/composable_kernel/example/42_groupnorm/CMakeLists.txt b/example/42_groupnorm/CMakeLists.txt similarity index 100% rename from composable_kernel/example/42_groupnorm/CMakeLists.txt rename to example/42_groupnorm/CMakeLists.txt diff --git a/composable_kernel/example/42_groupnorm/common.hpp b/example/42_groupnorm/common.hpp similarity index 100% rename from composable_kernel/example/42_groupnorm/common.hpp rename to example/42_groupnorm/common.hpp diff --git a/composable_kernel/example/42_groupnorm/groupnorm_sigmoid_mul_fp16.cpp b/example/42_groupnorm/groupnorm_sigmoid_mul_fp16.cpp similarity index 100% rename from composable_kernel/example/42_groupnorm/groupnorm_sigmoid_mul_fp16.cpp rename to example/42_groupnorm/groupnorm_sigmoid_mul_fp16.cpp diff --git a/composable_kernel/example/42_groupnorm/groupnorm_splitk_fp16.cpp b/example/42_groupnorm/groupnorm_splitk_fp16.cpp similarity index 100% rename from composable_kernel/example/42_groupnorm/groupnorm_splitk_fp16.cpp rename to example/42_groupnorm/groupnorm_splitk_fp16.cpp diff --git a/composable_kernel/example/42_groupnorm/groupnorm_swish_fp16.cpp b/example/42_groupnorm/groupnorm_swish_fp16.cpp similarity index 100% rename from composable_kernel/example/42_groupnorm/groupnorm_swish_fp16.cpp rename to example/42_groupnorm/groupnorm_swish_fp16.cpp diff --git a/composable_kernel/example/42_groupnorm/run_groupnorm_example.inc b/example/42_groupnorm/run_groupnorm_example.inc similarity index 100% rename from composable_kernel/example/42_groupnorm/run_groupnorm_example.inc rename to example/42_groupnorm/run_groupnorm_example.inc diff --git a/composable_kernel/example/43_splitk_gemm_bias_e_permute/CMakeLists.txt b/example/43_splitk_gemm_bias_e_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/example/43_splitk_gemm_bias_e_permute/CMakeLists.txt rename to example/43_splitk_gemm_bias_e_permute/CMakeLists.txt diff --git a/composable_kernel/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp b/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp rename to example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp diff --git a/composable_kernel/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp b/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp similarity index 100% rename from composable_kernel/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp rename to example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp diff --git a/composable_kernel/example/44_elementwise_permute/CMakeLists.txt b/example/44_elementwise_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/example/44_elementwise_permute/CMakeLists.txt rename to example/44_elementwise_permute/CMakeLists.txt diff --git a/composable_kernel/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp similarity index 100% rename from composable_kernel/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp rename to example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp diff --git a/composable_kernel/example/44_elementwise_permute/elementwise_permute_4D_fp16_2d.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16_2d.cpp similarity index 100% rename from composable_kernel/example/44_elementwise_permute/elementwise_permute_4D_fp16_2d.cpp rename to example/44_elementwise_permute/elementwise_permute_4D_fp16_2d.cpp diff --git a/composable_kernel/example/45_elementwise_normalization/CMakeLists.txt b/example/45_elementwise_normalization/CMakeLists.txt similarity index 100% rename from composable_kernel/example/45_elementwise_normalization/CMakeLists.txt rename to example/45_elementwise_normalization/CMakeLists.txt diff --git a/composable_kernel/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp b/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp similarity index 100% rename from composable_kernel/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp rename to example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp diff --git a/composable_kernel/example/46_gemm_add_multiply/CMakeLists.txt b/example/46_gemm_add_multiply/CMakeLists.txt similarity index 100% rename from composable_kernel/example/46_gemm_add_multiply/CMakeLists.txt rename to example/46_gemm_add_multiply/CMakeLists.txt diff --git a/composable_kernel/example/46_gemm_add_multiply/README.md b/example/46_gemm_add_multiply/README.md similarity index 100% rename from composable_kernel/example/46_gemm_add_multiply/README.md rename to example/46_gemm_add_multiply/README.md diff --git a/composable_kernel/example/46_gemm_add_multiply/common.hpp b/example/46_gemm_add_multiply/common.hpp similarity index 100% rename from composable_kernel/example/46_gemm_add_multiply/common.hpp rename to example/46_gemm_add_multiply/common.hpp diff --git a/composable_kernel/example/46_gemm_add_multiply/gemm_add_multiply_dl_fp16.cpp b/example/46_gemm_add_multiply/gemm_add_multiply_dl_fp16.cpp similarity index 100% rename from composable_kernel/example/46_gemm_add_multiply/gemm_add_multiply_dl_fp16.cpp rename to example/46_gemm_add_multiply/gemm_add_multiply_dl_fp16.cpp diff --git a/composable_kernel/example/46_gemm_add_multiply/gemm_add_multiply_xdl_fp16.cpp b/example/46_gemm_add_multiply/gemm_add_multiply_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/46_gemm_add_multiply/gemm_add_multiply_xdl_fp16.cpp rename to example/46_gemm_add_multiply/gemm_add_multiply_xdl_fp16.cpp diff --git a/composable_kernel/example/46_gemm_add_multiply/run_gemm_add_multiply_example.inc b/example/46_gemm_add_multiply/run_gemm_add_multiply_example.inc similarity index 100% rename from composable_kernel/example/46_gemm_add_multiply/run_gemm_add_multiply_example.inc rename to example/46_gemm_add_multiply/run_gemm_add_multiply_example.inc diff --git a/composable_kernel/example/47_gemm_bias_softmax_gemm_permute/CMakeLists.txt b/example/47_gemm_bias_softmax_gemm_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/example/47_gemm_bias_softmax_gemm_permute/CMakeLists.txt rename to example/47_gemm_bias_softmax_gemm_permute/CMakeLists.txt diff --git a/composable_kernel/example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute.cpp b/example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute.cpp similarity index 100% rename from composable_kernel/example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute.cpp rename to example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute.cpp diff --git a/composable_kernel/example/48_pool3d_fwd/CMakeLists.txt b/example/48_pool3d_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/example/48_pool3d_fwd/CMakeLists.txt rename to example/48_pool3d_fwd/CMakeLists.txt diff --git a/composable_kernel/example/48_pool3d_fwd/pool3d_fwd_common.hpp b/example/48_pool3d_fwd/pool3d_fwd_common.hpp similarity index 100% rename from composable_kernel/example/48_pool3d_fwd/pool3d_fwd_common.hpp rename to example/48_pool3d_fwd/pool3d_fwd_common.hpp diff --git a/composable_kernel/example/48_pool3d_fwd/pool3d_fwd_fp16.cpp b/example/48_pool3d_fwd/pool3d_fwd_fp16.cpp similarity index 100% rename from composable_kernel/example/48_pool3d_fwd/pool3d_fwd_fp16.cpp rename to example/48_pool3d_fwd/pool3d_fwd_fp16.cpp diff --git a/composable_kernel/example/49_maxpool2d_bwd/CMakeLists.txt b/example/49_maxpool2d_bwd/CMakeLists.txt similarity index 100% rename from composable_kernel/example/49_maxpool2d_bwd/CMakeLists.txt rename to example/49_maxpool2d_bwd/CMakeLists.txt diff --git a/composable_kernel/example/49_maxpool2d_bwd/maxpool2d_bwd_bf16.cpp b/example/49_maxpool2d_bwd/maxpool2d_bwd_bf16.cpp similarity index 100% rename from composable_kernel/example/49_maxpool2d_bwd/maxpool2d_bwd_bf16.cpp rename to example/49_maxpool2d_bwd/maxpool2d_bwd_bf16.cpp diff --git a/composable_kernel/example/49_maxpool2d_bwd/maxpool2d_bwd_common.hpp b/example/49_maxpool2d_bwd/maxpool2d_bwd_common.hpp similarity index 100% rename from composable_kernel/example/49_maxpool2d_bwd/maxpool2d_bwd_common.hpp rename to example/49_maxpool2d_bwd/maxpool2d_bwd_common.hpp diff --git a/composable_kernel/example/49_maxpool2d_bwd/maxpool2d_bwd_fp16.cpp b/example/49_maxpool2d_bwd/maxpool2d_bwd_fp16.cpp similarity index 100% rename from composable_kernel/example/49_maxpool2d_bwd/maxpool2d_bwd_fp16.cpp rename to example/49_maxpool2d_bwd/maxpool2d_bwd_fp16.cpp diff --git a/composable_kernel/example/49_maxpool2d_bwd/maxpool2d_bwd_fp32.cpp b/example/49_maxpool2d_bwd/maxpool2d_bwd_fp32.cpp similarity index 100% rename from composable_kernel/example/49_maxpool2d_bwd/maxpool2d_bwd_fp32.cpp rename to example/49_maxpool2d_bwd/maxpool2d_bwd_fp32.cpp diff --git a/composable_kernel/example/50_put_element/CMakeLists.txt b/example/50_put_element/CMakeLists.txt similarity index 100% rename from composable_kernel/example/50_put_element/CMakeLists.txt rename to example/50_put_element/CMakeLists.txt diff --git a/composable_kernel/example/50_put_element/put_element_fp16.cpp b/example/50_put_element/put_element_fp16.cpp similarity index 100% rename from composable_kernel/example/50_put_element/put_element_fp16.cpp rename to example/50_put_element/put_element_fp16.cpp diff --git a/composable_kernel/example/51_avgpool3d_bwd/CMakeLists.txt b/example/51_avgpool3d_bwd/CMakeLists.txt similarity index 100% rename from composable_kernel/example/51_avgpool3d_bwd/CMakeLists.txt rename to example/51_avgpool3d_bwd/CMakeLists.txt diff --git a/composable_kernel/example/51_avgpool3d_bwd/avgpool3d_bwd_bf16.cpp b/example/51_avgpool3d_bwd/avgpool3d_bwd_bf16.cpp similarity index 100% rename from composable_kernel/example/51_avgpool3d_bwd/avgpool3d_bwd_bf16.cpp rename to example/51_avgpool3d_bwd/avgpool3d_bwd_bf16.cpp diff --git a/composable_kernel/example/51_avgpool3d_bwd/avgpool3d_bwd_common.hpp b/example/51_avgpool3d_bwd/avgpool3d_bwd_common.hpp similarity index 100% rename from composable_kernel/example/51_avgpool3d_bwd/avgpool3d_bwd_common.hpp rename to example/51_avgpool3d_bwd/avgpool3d_bwd_common.hpp diff --git a/composable_kernel/example/51_avgpool3d_bwd/avgpool3d_bwd_fp16.cpp b/example/51_avgpool3d_bwd/avgpool3d_bwd_fp16.cpp similarity index 100% rename from composable_kernel/example/51_avgpool3d_bwd/avgpool3d_bwd_fp16.cpp rename to example/51_avgpool3d_bwd/avgpool3d_bwd_fp16.cpp diff --git a/composable_kernel/example/51_avgpool3d_bwd/avgpool3d_bwd_fp32.cpp b/example/51_avgpool3d_bwd/avgpool3d_bwd_fp32.cpp similarity index 100% rename from composable_kernel/example/51_avgpool3d_bwd/avgpool3d_bwd_fp32.cpp rename to example/51_avgpool3d_bwd/avgpool3d_bwd_fp32.cpp diff --git a/composable_kernel/example/52_im2col_col2im/CMakeLists.txt b/example/52_im2col_col2im/CMakeLists.txt similarity index 100% rename from composable_kernel/example/52_im2col_col2im/CMakeLists.txt rename to example/52_im2col_col2im/CMakeLists.txt diff --git a/composable_kernel/example/52_im2col_col2im/column_to_image_f32.cpp b/example/52_im2col_col2im/column_to_image_f32.cpp similarity index 100% rename from composable_kernel/example/52_im2col_col2im/column_to_image_f32.cpp rename to example/52_im2col_col2im/column_to_image_f32.cpp diff --git a/composable_kernel/example/52_im2col_col2im/common.hpp b/example/52_im2col_col2im/common.hpp similarity index 100% rename from composable_kernel/example/52_im2col_col2im/common.hpp rename to example/52_im2col_col2im/common.hpp diff --git a/composable_kernel/example/52_im2col_col2im/image_to_column_f32.cpp b/example/52_im2col_col2im/image_to_column_f32.cpp similarity index 100% rename from composable_kernel/example/52_im2col_col2im/image_to_column_f32.cpp rename to example/52_im2col_col2im/image_to_column_f32.cpp diff --git a/composable_kernel/example/53_gemv_splitk/CMakeLists.txt b/example/53_gemv_splitk/CMakeLists.txt similarity index 100% rename from composable_kernel/example/53_gemv_splitk/CMakeLists.txt rename to example/53_gemv_splitk/CMakeLists.txt diff --git a/composable_kernel/example/53_gemv_splitk/README.md b/example/53_gemv_splitk/README.md similarity index 100% rename from composable_kernel/example/53_gemv_splitk/README.md rename to example/53_gemv_splitk/README.md diff --git a/composable_kernel/example/53_gemv_splitk/common.hpp b/example/53_gemv_splitk/common.hpp similarity index 100% rename from composable_kernel/example/53_gemv_splitk/common.hpp rename to example/53_gemv_splitk/common.hpp diff --git a/composable_kernel/example/53_gemv_splitk/gemv_splitk_fp16.cpp b/example/53_gemv_splitk/gemv_splitk_fp16.cpp similarity index 100% rename from composable_kernel/example/53_gemv_splitk/gemv_splitk_fp16.cpp rename to example/53_gemv_splitk/gemv_splitk_fp16.cpp diff --git a/composable_kernel/example/53_gemv_splitk/run_gemv_splitk_example.inc b/example/53_gemv_splitk/run_gemv_splitk_example.inc similarity index 100% rename from composable_kernel/example/53_gemv_splitk/run_gemv_splitk_example.inc rename to example/53_gemv_splitk/run_gemv_splitk_example.inc diff --git a/composable_kernel/example/54_tall_and_skinny_gemm_splitk/CMakeLists.txt b/example/54_tall_and_skinny_gemm_splitk/CMakeLists.txt similarity index 100% rename from composable_kernel/example/54_tall_and_skinny_gemm_splitk/CMakeLists.txt rename to example/54_tall_and_skinny_gemm_splitk/CMakeLists.txt diff --git a/composable_kernel/example/54_tall_and_skinny_gemm_splitk/README.md b/example/54_tall_and_skinny_gemm_splitk/README.md similarity index 100% rename from composable_kernel/example/54_tall_and_skinny_gemm_splitk/README.md rename to example/54_tall_and_skinny_gemm_splitk/README.md diff --git a/composable_kernel/example/54_tall_and_skinny_gemm_splitk/common.hpp b/example/54_tall_and_skinny_gemm_splitk/common.hpp similarity index 100% rename from composable_kernel/example/54_tall_and_skinny_gemm_splitk/common.hpp rename to example/54_tall_and_skinny_gemm_splitk/common.hpp diff --git a/composable_kernel/example/54_tall_and_skinny_gemm_splitk/run_tall_and_skinny_gemm_splitk_example.inc b/example/54_tall_and_skinny_gemm_splitk/run_tall_and_skinny_gemm_splitk_example.inc similarity index 100% rename from composable_kernel/example/54_tall_and_skinny_gemm_splitk/run_tall_and_skinny_gemm_splitk_example.inc rename to example/54_tall_and_skinny_gemm_splitk/run_tall_and_skinny_gemm_splitk_example.inc diff --git a/composable_kernel/example/54_tall_and_skinny_gemm_splitk/tall_and_skinny_gemm_splitk_fp16.cpp b/example/54_tall_and_skinny_gemm_splitk/tall_and_skinny_gemm_splitk_fp16.cpp similarity index 100% rename from composable_kernel/example/54_tall_and_skinny_gemm_splitk/tall_and_skinny_gemm_splitk_fp16.cpp rename to example/54_tall_and_skinny_gemm_splitk/tall_and_skinny_gemm_splitk_fp16.cpp diff --git a/composable_kernel/example/60_gemm_multi_ABD/CMakeLists.txt b/example/60_gemm_multi_ABD/CMakeLists.txt similarity index 100% rename from composable_kernel/example/60_gemm_multi_ABD/CMakeLists.txt rename to example/60_gemm_multi_ABD/CMakeLists.txt diff --git a/composable_kernel/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp b/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp rename to example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp diff --git a/composable_kernel/example/61_contraction_multi_ABD/CMakeLists.txt b/example/61_contraction_multi_ABD/CMakeLists.txt similarity index 100% rename from composable_kernel/example/61_contraction_multi_ABD/CMakeLists.txt rename to example/61_contraction_multi_ABD/CMakeLists.txt diff --git a/composable_kernel/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp16.cpp b/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp16.cpp similarity index 100% rename from composable_kernel/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp16.cpp rename to example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/CMakeLists.txt b/example/62_conv_fwd_activ/CMakeLists.txt similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/CMakeLists.txt rename to example/62_conv_fwd_activ/CMakeLists.txt diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_activ_common.hpp b/example/62_conv_fwd_activ/convnd_fwd_activ_common.hpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_activ_common.hpp rename to example/62_conv_fwd_activ/convnd_fwd_activ_common.hpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_abs_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_abs_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_abs_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_abs_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_clippedrelu_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_clippedrelu_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_clippedrelu_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_clippedrelu_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_elu_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_elu_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_elu_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_elu_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_leakyrelu_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_leakyrelu_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_leakyrelu_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_leakyrelu_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_pow_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_pow_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_pow_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_pow_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_relu_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_relu_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_relu_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_relu_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_sigmoid_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_sigmoid_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_sigmoid_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_sigmoid_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_softrelu_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_softrelu_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_softrelu_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_softrelu_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_tanh_fp16.cpp b/example/62_conv_fwd_activ/convnd_fwd_xdl_tanh_fp16.cpp similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/convnd_fwd_xdl_tanh_fp16.cpp rename to example/62_conv_fwd_activ/convnd_fwd_xdl_tanh_fp16.cpp diff --git a/composable_kernel/example/62_conv_fwd_activ/run_convnd_fwd_activ_example.inc b/example/62_conv_fwd_activ/run_convnd_fwd_activ_example.inc similarity index 100% rename from composable_kernel/example/62_conv_fwd_activ/run_convnd_fwd_activ_example.inc rename to example/62_conv_fwd_activ/run_convnd_fwd_activ_example.inc diff --git a/composable_kernel/example/CMakeLists.txt b/example/CMakeLists.txt similarity index 100% rename from composable_kernel/example/CMakeLists.txt rename to example/CMakeLists.txt diff --git a/composable_kernel/include/ck/ck.hpp b/include/ck/ck.hpp similarity index 100% rename from composable_kernel/include/ck/ck.hpp rename to include/ck/ck.hpp diff --git a/composable_kernel/include/ck/config.h.in b/include/ck/config.h.in similarity index 100% rename from composable_kernel/include/ck/config.h.in rename to include/ck/config.h.in diff --git a/composable_kernel/include/ck/host_utility/device_prop.hpp b/include/ck/host_utility/device_prop.hpp similarity index 100% rename from composable_kernel/include/ck/host_utility/device_prop.hpp rename to include/ck/host_utility/device_prop.hpp diff --git a/composable_kernel/include/ck/host_utility/hip_check_error.hpp b/include/ck/host_utility/hip_check_error.hpp similarity index 100% rename from composable_kernel/include/ck/host_utility/hip_check_error.hpp rename to include/ck/host_utility/hip_check_error.hpp diff --git a/composable_kernel/include/ck/host_utility/io.hpp b/include/ck/host_utility/io.hpp similarity index 100% rename from composable_kernel/include/ck/host_utility/io.hpp rename to include/ck/host_utility/io.hpp diff --git a/composable_kernel/include/ck/host_utility/kernel_launch.hpp b/include/ck/host_utility/kernel_launch.hpp similarity index 100% rename from composable_kernel/include/ck/host_utility/kernel_launch.hpp rename to include/ck/host_utility/kernel_launch.hpp diff --git a/composable_kernel/include/ck/host_utility/stream_utility.hpp b/include/ck/host_utility/stream_utility.hpp similarity index 100% rename from composable_kernel/include/ck/host_utility/stream_utility.hpp rename to include/ck/host_utility/stream_utility.hpp diff --git a/composable_kernel/include/ck/problem_transform/transform_forward_convolution3d_into_gemm_v4r4r4_ndhwc_kzyxc_ndhwk.hpp b/include/ck/problem_transform/transform_forward_convolution3d_into_gemm_v4r4r4_ndhwc_kzyxc_ndhwk.hpp similarity index 100% rename from composable_kernel/include/ck/problem_transform/transform_forward_convolution3d_into_gemm_v4r4r4_ndhwc_kzyxc_ndhwk.hpp rename to include/ck/problem_transform/transform_forward_convolution3d_into_gemm_v4r4r4_ndhwc_kzyxc_ndhwk.hpp diff --git a/composable_kernel/include/ck/stream_config.hpp b/include/ck/stream_config.hpp similarity index 100% rename from composable_kernel/include/ck/stream_config.hpp rename to include/ck/stream_config.hpp diff --git a/composable_kernel/include/ck/tensor/static_tensor.hpp b/include/ck/tensor/static_tensor.hpp similarity index 100% rename from composable_kernel/include/ck/tensor/static_tensor.hpp rename to include/ck/tensor/static_tensor.hpp diff --git a/composable_kernel/include/ck/tensor_description/cluster_descriptor.hpp b/include/ck/tensor_description/cluster_descriptor.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_description/cluster_descriptor.hpp rename to include/ck/tensor_description/cluster_descriptor.hpp diff --git a/composable_kernel/include/ck/tensor_description/multi_index_transform.hpp b/include/ck/tensor_description/multi_index_transform.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_description/multi_index_transform.hpp rename to include/ck/tensor_description/multi_index_transform.hpp diff --git a/composable_kernel/include/ck/tensor_description/multi_index_transform_helper.hpp b/include/ck/tensor_description/multi_index_transform_helper.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_description/multi_index_transform_helper.hpp rename to include/ck/tensor_description/multi_index_transform_helper.hpp diff --git a/composable_kernel/include/ck/tensor_description/tensor_adaptor.hpp b/include/ck/tensor_description/tensor_adaptor.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_description/tensor_adaptor.hpp rename to include/ck/tensor_description/tensor_adaptor.hpp diff --git a/composable_kernel/include/ck/tensor_description/tensor_descriptor.hpp b/include/ck/tensor_description/tensor_descriptor.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_description/tensor_descriptor.hpp rename to include/ck/tensor_description/tensor_descriptor.hpp diff --git a/composable_kernel/include/ck/tensor_description/tensor_descriptor_helper.hpp b/include/ck/tensor_description/tensor_descriptor_helper.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_description/tensor_descriptor_helper.hpp rename to include/ck/tensor_description/tensor_descriptor_helper.hpp diff --git a/composable_kernel/include/ck/tensor_description/tensor_space_filling_curve.hpp b/include/ck/tensor_description/tensor_space_filling_curve.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_description/tensor_space_filling_curve.hpp rename to include/ck/tensor_description/tensor_space_filling_curve.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_dl_v2r3.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_dl_v2r3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_dl_v2r3.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_gemm_dl_v2r3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v2r2.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v2r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v2r2.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v2r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v3.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v3.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_wmma.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_wmma.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_gemm_wmma.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_softmax.hpp b/include/ck/tensor_operation/gpu/block/blockwise_softmax.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_softmax.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_softmax.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_tensor_slice_transfer_v5r1.hpp b/include/ck/tensor_operation/gpu/block/blockwise_tensor_slice_transfer_v5r1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_tensor_slice_transfer_v5r1.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_tensor_slice_transfer_v5r1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_welford.hpp b/include/ck/tensor_operation/gpu/block/blockwise_welford.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/blockwise_welford.hpp rename to include/ck/tensor_operation/gpu/block/blockwise_welford.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/reduction_functions_blockwise.hpp b/include/ck/tensor_operation/gpu/block/reduction_functions_blockwise.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/reduction_functions_blockwise.hpp rename to include/ck/tensor_operation/gpu/block/reduction_functions_blockwise.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp rename to include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1.hpp rename to include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1r2.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1r2.hpp rename to include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r2.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r2.hpp rename to include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r3.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r3.hpp rename to include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7.hpp rename to include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7r2.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7r2.hpp rename to include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/conv_tensor_rearrange_op.hpp b/include/ck/tensor_operation/gpu/device/conv_tensor_rearrange_op.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/conv_tensor_rearrange_op.hpp rename to include/ck/tensor_operation/gpu/device/conv_tensor_rearrange_op.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/convolution_backward_data_specialization.hpp b/include/ck/tensor_operation/gpu/device/convolution_backward_data_specialization.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/convolution_backward_data_specialization.hpp rename to include/ck/tensor_operation/gpu/device/convolution_backward_data_specialization.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp b/include/ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp rename to include/ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp b/include/ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp rename to include/ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_avgpool_bwd.hpp b/include/ck/tensor_operation/gpu/device/device_avgpool_bwd.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_avgpool_bwd.hpp rename to include/ck/tensor_operation/gpu/device/device_avgpool_bwd.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_base.hpp b/include/ck/tensor_operation/gpu/device/device_base.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_base.hpp rename to include/ck/tensor_operation/gpu/device/device_base.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_contraction_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_batched_contraction_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_contraction_multiple_d.hpp rename to include/ck/tensor_operation/gpu/device/device_batched_contraction_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm.hpp rename to include/ck/tensor_operation/gpu/device/device_batched_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_e_permute.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_e_permute.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_e_permute.hpp rename to include/ck/tensor_operation/gpu/device/device_batched_gemm_e_permute.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_gemm.hpp rename to include/ck/tensor_operation/gpu/device/device_batched_gemm_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_multi_d.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_multi_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_multi_d.hpp rename to include/ck/tensor_operation/gpu/device/device_batched_gemm_multi_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_multiple_d_gemm_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_multiple_d_gemm_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_multiple_d_gemm_multiple_d.hpp rename to include/ck/tensor_operation/gpu/device/device_batched_gemm_multiple_d_gemm_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm.hpp rename to include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_permute.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_permute.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_permute.hpp rename to include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_permute.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batchnorm_backward.hpp b/include/ck/tensor_operation/gpu/device/device_batchnorm_backward.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batchnorm_backward.hpp rename to include/ck/tensor_operation/gpu/device/device_batchnorm_backward.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batchnorm_forward.hpp b/include/ck/tensor_operation/gpu/device/device_batchnorm_forward.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batchnorm_forward.hpp rename to include/ck/tensor_operation/gpu/device/device_batchnorm_forward.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_batchnorm_infer.hpp b/include/ck/tensor_operation/gpu/device/device_batchnorm_infer.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_batchnorm_infer.hpp rename to include/ck/tensor_operation/gpu/device/device_batchnorm_infer.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_cgemm.hpp b/include/ck/tensor_operation/gpu/device/device_cgemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_cgemm.hpp rename to include/ck/tensor_operation/gpu/device/device_cgemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_contraction_multiple_abd.hpp b/include/ck/tensor_operation/gpu/device/device_contraction_multiple_abd.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_contraction_multiple_abd.hpp rename to include/ck/tensor_operation/gpu/device/device_contraction_multiple_abd.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_contraction_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_contraction_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_contraction_multiple_d.hpp rename to include/ck/tensor_operation/gpu/device/device_contraction_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_bwd_data.hpp b/include/ck/tensor_operation/gpu/device/device_conv_bwd_data.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_bwd_data.hpp rename to include/ck/tensor_operation/gpu/device/device_conv_bwd_data.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_fwd.hpp b/include/ck/tensor_operation/gpu/device/device_conv_fwd.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_fwd.hpp rename to include/ck/tensor_operation/gpu/device/device_conv_fwd.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation.hpp b/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation.hpp rename to include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation_add.hpp b/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation_add.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation_add.hpp rename to include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation_add.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_tensor_rearrange.hpp b/include/ck/tensor_operation/gpu/device/device_conv_tensor_rearrange.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_conv_tensor_rearrange.hpp rename to include/ck/tensor_operation/gpu/device/device_conv_tensor_rearrange.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_elementwise.hpp b/include/ck/tensor_operation/gpu/device/device_elementwise.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_elementwise.hpp rename to include/ck/tensor_operation/gpu/device/device_elementwise.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_elementwise_normalization.hpp b/include/ck/tensor_operation/gpu/device/device_elementwise_normalization.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_elementwise_normalization.hpp rename to include/ck/tensor_operation/gpu/device/device_elementwise_normalization.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_bias_e_permute.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_bias_e_permute.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_bias_e_permute.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm_bias_e_permute.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_multiple_abd.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_multiple_abd.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_multiple_abd.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm_multiple_abd.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_layernorm.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_layernorm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_layernorm.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_layernorm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_multiple_r.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_multiple_r.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_multiple_r.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_multiple_r.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_reduce.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_reduce.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_reduce.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm_reduce.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_splitk.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_splitk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_splitk.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm_splitk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_streamk.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_streamk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_gemm_streamk.hpp rename to include/ck/tensor_operation/gpu/device/device_gemm_streamk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_contraction_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_contraction_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_contraction_multiple_d.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_contraction_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_gemm_fixed_nk.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm_fixed_nk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_gemm_fixed_nk.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_gemm_fixed_nk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_gemm_splitk.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm_splitk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_grouped_gemm_splitk.hpp rename to include/ck/tensor_operation/gpu/device/device_grouped_gemm_splitk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_max_pool_bwd.hpp b/include/ck/tensor_operation/gpu/device/device_max_pool_bwd.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_max_pool_bwd.hpp rename to include/ck/tensor_operation/gpu/device/device_max_pool_bwd.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_multiple_reduce.hpp b/include/ck/tensor_operation/gpu/device/device_multiple_reduce.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_multiple_reduce.hpp rename to include/ck/tensor_operation/gpu/device/device_multiple_reduce.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_normalization.hpp b/include/ck/tensor_operation/gpu/device/device_normalization.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_normalization.hpp rename to include/ck/tensor_operation/gpu/device/device_normalization.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_permute.hpp b/include/ck/tensor_operation/gpu/device/device_permute.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_permute.hpp rename to include/ck/tensor_operation/gpu/device/device_permute.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_pool_fwd.hpp b/include/ck/tensor_operation/gpu/device/device_pool_fwd.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_pool_fwd.hpp rename to include/ck/tensor_operation/gpu/device/device_pool_fwd.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_put_element.hpp b/include/ck/tensor_operation/gpu/device/device_put_element.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_put_element.hpp rename to include/ck/tensor_operation/gpu/device/device_put_element.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_reduce.hpp b/include/ck/tensor_operation/gpu/device/device_reduce.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_reduce.hpp rename to include/ck/tensor_operation/gpu/device/device_reduce.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_softmax.hpp b/include/ck/tensor_operation/gpu/device/device_softmax.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_softmax.hpp rename to include/ck/tensor_operation/gpu/device/device_softmax.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_splitk_contraction_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_splitk_contraction_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_splitk_contraction_multiple_d.hpp rename to include/ck/tensor_operation/gpu/device/device_splitk_contraction_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/device_tall_and_skinny_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_tall_and_skinny_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/device_tall_and_skinny_gemm.hpp rename to include/ck/tensor_operation/gpu/device/device_tall_and_skinny_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/gemm_specialization.hpp b/include/ck/tensor_operation/gpu/device/gemm_specialization.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/gemm_specialization.hpp rename to include/ck/tensor_operation/gpu/device/gemm_specialization.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_avgpool3d_bwd_ndhwc_ndhwc.hpp b/include/ck/tensor_operation/gpu/device/impl/device_avgpool3d_bwd_ndhwc_ndhwc.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_avgpool3d_bwd_ndhwc_ndhwc.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_avgpool3d_bwd_ndhwc_ndhwc.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multi_d_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multi_d_xdl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multi_d_xdl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multi_d_xdl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_xdl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_xdl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_xdl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_backward_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_backward_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_backward_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batchnorm_backward_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl_obsolete.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl_obsolete.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl_obsolete.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl_obsolete.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_cgemm_4gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_cgemm_4gemm_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_cgemm_4gemm_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_cgemm_4gemm_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_abd_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_abd_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_abd_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_abd_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_elementwise_2d_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_elementwise_2d_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_elementwise_2d_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_elementwise_2d_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_elementwise_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_elementwise_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_elementwise_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_elementwise_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_elementwise_normalization_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_elementwise_normalization_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_elementwise_normalization_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_elementwise_normalization_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_bias_add_reduce_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_bias_add_reduce_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_bias_add_reduce_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_bias_add_reduce_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_dl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_dl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_dl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_wmma_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_wmma_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_wmma_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_streamk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_streamk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_streamk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_streamk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_waveletmodel_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_waveletmodel_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_waveletmodel_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_waveletmodel_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_dl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_dl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_dl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_nhwc_kyxc_nhwk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_nhwc_kyxc_nhwk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_nhwc_kyxc_nhwk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_fixed_nk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_fixed_nk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_fixed_nk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_fixed_nk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_max_pool_bwd_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_max_pool_bwd_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_max_pool_bwd_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_max_pool_bwd_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_multiblock.hpp b/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_multiblock.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_multiblock.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_multiblock.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_threadwise.hpp b/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_threadwise.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_threadwise.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_threadwise.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_normalization_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_normalization_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_normalization_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_normalization_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_normalization_splitk_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_normalization_splitk_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_normalization_splitk_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_normalization_splitk_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_permute_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_permute_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_permute_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_permute_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp b/include/ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp b/include/ck/tensor_operation/gpu/device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_put_element_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_put_element_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_put_element_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_put_element_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_reduce_common.hpp b/include/ck/tensor_operation/gpu/device/impl/device_reduce_common.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_reduce_common.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_reduce_common.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp b/include/ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_reduce_threadwise.hpp b/include/ck/tensor_operation/gpu/device/impl/device_reduce_threadwise.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_reduce_threadwise.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_reduce_threadwise.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_sparse_embeddings_forward_layernorm.hpp b/include/ck/tensor_operation/gpu/device/impl/device_sparse_embeddings_forward_layernorm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_sparse_embeddings_forward_layernorm.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_sparse_embeddings_forward_layernorm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_tall_and_skinny_gemm_splitk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_tall_and_skinny_gemm_splitk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/impl/device_tall_and_skinny_gemm_splitk.hpp rename to include/ck/tensor_operation/gpu/device/impl/device_tall_and_skinny_gemm_splitk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/masking_specialization.hpp b/include/ck/tensor_operation/gpu/device/masking_specialization.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/masking_specialization.hpp rename to include/ck/tensor_operation/gpu/device/masking_specialization.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/matrix_padder.hpp b/include/ck/tensor_operation/gpu/device/matrix_padder.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/matrix_padder.hpp rename to include/ck/tensor_operation/gpu/device/matrix_padder.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp b/include/ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp rename to include/ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/tensor_layout.hpp b/include/ck/tensor_operation/gpu/device/tensor_layout.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/tensor_layout.hpp rename to include/ck/tensor_operation/gpu/device/tensor_layout.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/tensor_specialization.hpp b/include/ck/tensor_operation/gpu/device/tensor_specialization.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/tensor_specialization.hpp rename to include/ck/tensor_operation/gpu/device/tensor_specialization.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/device/welford_helper.hpp b/include/ck/tensor_operation/gpu/device/welford_helper.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/device/welford_helper.hpp rename to include/ck/tensor_operation/gpu/device/welford_helper.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp rename to include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/element/element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/element_wise_operation.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/element/element_wise_operation.hpp rename to include/ck/tensor_operation/gpu/element/element_wise_operation.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/element/quantization_operation.hpp b/include/ck/tensor_operation/gpu/element/quantization_operation.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/element/quantization_operation.hpp rename to include/ck/tensor_operation/gpu/element/quantization_operation.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp rename to include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_batchnorm_forward.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_batchnorm_forward.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_batchnorm_forward.hpp rename to include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_batchnorm_forward.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp rename to include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_first_half.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_first_half.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_first_half.hpp rename to include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_first_half.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp rename to include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp rename to include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp b/include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp rename to include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_welford_second_half_layernorm2d.hpp b/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_welford_second_half_layernorm2d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_welford_second_half_layernorm2d.hpp rename to include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_welford_second_half_layernorm2d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_multiblock.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_multiblock.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_multiblock.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_multiblock.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_threadwise.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_threadwise.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_threadwise.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_threadwise.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_backward_blockwise_welford.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_backward_blockwise_welford.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_backward_blockwise_welford.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_backward_blockwise_welford.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_forward_blockwise_welford.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_forward_blockwise_welford.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_forward_blockwise_welford.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_forward_blockwise_welford.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_2d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_2d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_2d.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_elementwise_2d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_selector.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_selector.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_selector.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_selector.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v3.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle_v2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle_v2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle_v2.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle_v2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_waveletmodel.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_waveletmodel.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_waveletmodel.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_waveletmodel.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_b_lds_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_b_lds_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_b_lds_v1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_b_lds_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_streamk.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_streamk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_streamk.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_streamk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r1.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r2.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r3.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemv_splitk.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemv_splitk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_gemv_splitk.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_gemv_splitk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_permute.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_permute.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_permute.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_permute.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_set_buffer_value.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_set_buffer_value.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_set_buffer_value.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_set_buffer_value.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_set_multiple_buffer_value.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_set_multiple_buffer_value.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_set_multiple_buffer_value.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_set_multiple_buffer_value.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_softmax.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_softmax.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_softmax.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_softmax.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_sparse_embeddings_forward_layernorm.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_sparse_embeddings_forward_layernorm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_sparse_embeddings_forward_layernorm.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_sparse_embeddings_forward_layernorm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_tall_and_skinny_gemm_splitk.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_tall_and_skinny_gemm_splitk.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_tall_and_skinny_gemm_splitk.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_tall_and_skinny_gemm_splitk.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_tensor_rearrange.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_tensor_rearrange.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/gridwise_tensor_rearrange.hpp rename to include/ck/tensor_operation/gpu/grid/gridwise_tensor_rearrange.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_naive_variance.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_naive_variance.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_naive_variance.hpp rename to include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_naive_variance.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_selector.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_selector.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_selector.hpp rename to include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_selector.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_1st.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_1st.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_1st.hpp rename to include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_1st.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_2nd.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_2nd.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_2nd.hpp rename to include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_2nd.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_welford_variance.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_welford_variance.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_welford_variance.hpp rename to include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_welford_variance.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp b/include/ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp rename to include/ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_contraction_dl.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_contraction_dl.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_contraction_dl.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_contraction_dl.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_gemm_dlops_v3.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_gemm_dlops_v3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_gemm_dlops_v3.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_gemm_dlops_v3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_set.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_set.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_set.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_set.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v4r1.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v4r1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v4r1.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v4r1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v5r1.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v5r1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v5r1.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v5r1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1r2.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1r2.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7r2.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7r2.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7r2.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7r2.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_welford.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_welford.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/thread/threadwise_welford.hpp rename to include/ck/tensor_operation/gpu/thread/threadwise_welford.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp b/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp rename to include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp b/include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp rename to include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp b/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp rename to include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm.hpp b/include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm.hpp rename to include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm.hpp diff --git a/composable_kernel/include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp b/include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp rename to include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp diff --git a/composable_kernel/include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp b/include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp similarity index 100% rename from composable_kernel/include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp rename to include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp diff --git a/composable_kernel/include/ck/utility/amd_address_space.hpp b/include/ck/utility/amd_address_space.hpp similarity index 100% rename from composable_kernel/include/ck/utility/amd_address_space.hpp rename to include/ck/utility/amd_address_space.hpp diff --git a/composable_kernel/include/ck/utility/amd_buffer_addressing.hpp b/include/ck/utility/amd_buffer_addressing.hpp similarity index 100% rename from composable_kernel/include/ck/utility/amd_buffer_addressing.hpp rename to include/ck/utility/amd_buffer_addressing.hpp diff --git a/composable_kernel/include/ck/utility/amd_gemm_dpp.hpp b/include/ck/utility/amd_gemm_dpp.hpp similarity index 100% rename from composable_kernel/include/ck/utility/amd_gemm_dpp.hpp rename to include/ck/utility/amd_gemm_dpp.hpp diff --git a/composable_kernel/include/ck/utility/amd_inline_asm.hpp b/include/ck/utility/amd_inline_asm.hpp similarity index 100% rename from composable_kernel/include/ck/utility/amd_inline_asm.hpp rename to include/ck/utility/amd_inline_asm.hpp diff --git a/composable_kernel/include/ck/utility/amd_wave_read_first_lane.hpp b/include/ck/utility/amd_wave_read_first_lane.hpp similarity index 100% rename from composable_kernel/include/ck/utility/amd_wave_read_first_lane.hpp rename to include/ck/utility/amd_wave_read_first_lane.hpp diff --git a/composable_kernel/include/ck/utility/amd_wmma.hpp b/include/ck/utility/amd_wmma.hpp similarity index 100% rename from composable_kernel/include/ck/utility/amd_wmma.hpp rename to include/ck/utility/amd_wmma.hpp diff --git a/composable_kernel/include/ck/utility/amd_xdlops.hpp b/include/ck/utility/amd_xdlops.hpp similarity index 100% rename from composable_kernel/include/ck/utility/amd_xdlops.hpp rename to include/ck/utility/amd_xdlops.hpp diff --git a/composable_kernel/include/ck/utility/array.hpp b/include/ck/utility/array.hpp similarity index 100% rename from composable_kernel/include/ck/utility/array.hpp rename to include/ck/utility/array.hpp diff --git a/composable_kernel/include/ck/utility/array_multi_index.hpp b/include/ck/utility/array_multi_index.hpp similarity index 100% rename from composable_kernel/include/ck/utility/array_multi_index.hpp rename to include/ck/utility/array_multi_index.hpp diff --git a/composable_kernel/include/ck/utility/c_style_pointer_cast.hpp b/include/ck/utility/c_style_pointer_cast.hpp similarity index 100% rename from composable_kernel/include/ck/utility/c_style_pointer_cast.hpp rename to include/ck/utility/c_style_pointer_cast.hpp diff --git a/composable_kernel/include/ck/utility/common_header.hpp b/include/ck/utility/common_header.hpp similarity index 100% rename from composable_kernel/include/ck/utility/common_header.hpp rename to include/ck/utility/common_header.hpp diff --git a/composable_kernel/include/ck/utility/container_element_picker.hpp b/include/ck/utility/container_element_picker.hpp similarity index 100% rename from composable_kernel/include/ck/utility/container_element_picker.hpp rename to include/ck/utility/container_element_picker.hpp diff --git a/composable_kernel/include/ck/utility/container_helper.hpp b/include/ck/utility/container_helper.hpp similarity index 100% rename from composable_kernel/include/ck/utility/container_helper.hpp rename to include/ck/utility/container_helper.hpp diff --git a/composable_kernel/include/ck/utility/data_type.hpp b/include/ck/utility/data_type.hpp similarity index 100% rename from composable_kernel/include/ck/utility/data_type.hpp rename to include/ck/utility/data_type.hpp diff --git a/composable_kernel/include/ck/utility/debug.hpp b/include/ck/utility/debug.hpp similarity index 100% rename from composable_kernel/include/ck/utility/debug.hpp rename to include/ck/utility/debug.hpp diff --git a/composable_kernel/include/ck/utility/dynamic_buffer.hpp b/include/ck/utility/dynamic_buffer.hpp similarity index 100% rename from composable_kernel/include/ck/utility/dynamic_buffer.hpp rename to include/ck/utility/dynamic_buffer.hpp diff --git a/composable_kernel/include/ck/utility/enable_if.hpp b/include/ck/utility/enable_if.hpp similarity index 100% rename from composable_kernel/include/ck/utility/enable_if.hpp rename to include/ck/utility/enable_if.hpp diff --git a/composable_kernel/include/ck/utility/f8_utils.hpp b/include/ck/utility/f8_utils.hpp similarity index 100% rename from composable_kernel/include/ck/utility/f8_utils.hpp rename to include/ck/utility/f8_utils.hpp diff --git a/composable_kernel/include/ck/utility/functional.hpp b/include/ck/utility/functional.hpp similarity index 100% rename from composable_kernel/include/ck/utility/functional.hpp rename to include/ck/utility/functional.hpp diff --git a/composable_kernel/include/ck/utility/functional2.hpp b/include/ck/utility/functional2.hpp similarity index 100% rename from composable_kernel/include/ck/utility/functional2.hpp rename to include/ck/utility/functional2.hpp diff --git a/composable_kernel/include/ck/utility/functional3.hpp b/include/ck/utility/functional3.hpp similarity index 100% rename from composable_kernel/include/ck/utility/functional3.hpp rename to include/ck/utility/functional3.hpp diff --git a/composable_kernel/include/ck/utility/functional4.hpp b/include/ck/utility/functional4.hpp similarity index 100% rename from composable_kernel/include/ck/utility/functional4.hpp rename to include/ck/utility/functional4.hpp diff --git a/composable_kernel/include/ck/utility/generic_memory_space_atomic.hpp b/include/ck/utility/generic_memory_space_atomic.hpp similarity index 100% rename from composable_kernel/include/ck/utility/generic_memory_space_atomic.hpp rename to include/ck/utility/generic_memory_space_atomic.hpp diff --git a/composable_kernel/include/ck/utility/get_id.hpp b/include/ck/utility/get_id.hpp similarity index 100% rename from composable_kernel/include/ck/utility/get_id.hpp rename to include/ck/utility/get_id.hpp diff --git a/composable_kernel/include/ck/utility/get_shift.hpp b/include/ck/utility/get_shift.hpp similarity index 100% rename from composable_kernel/include/ck/utility/get_shift.hpp rename to include/ck/utility/get_shift.hpp diff --git a/composable_kernel/include/ck/utility/ignore.hpp b/include/ck/utility/ignore.hpp similarity index 100% rename from composable_kernel/include/ck/utility/ignore.hpp rename to include/ck/utility/ignore.hpp diff --git a/composable_kernel/include/ck/utility/inner_product.hpp b/include/ck/utility/inner_product.hpp similarity index 100% rename from composable_kernel/include/ck/utility/inner_product.hpp rename to include/ck/utility/inner_product.hpp diff --git a/composable_kernel/include/ck/utility/inner_product_dpp8.hpp b/include/ck/utility/inner_product_dpp8.hpp similarity index 100% rename from composable_kernel/include/ck/utility/inner_product_dpp8.hpp rename to include/ck/utility/inner_product_dpp8.hpp diff --git a/composable_kernel/include/ck/utility/integral_constant.hpp b/include/ck/utility/integral_constant.hpp similarity index 100% rename from composable_kernel/include/ck/utility/integral_constant.hpp rename to include/ck/utility/integral_constant.hpp diff --git a/composable_kernel/include/ck/utility/is_detected.hpp b/include/ck/utility/is_detected.hpp similarity index 100% rename from composable_kernel/include/ck/utility/is_detected.hpp rename to include/ck/utility/is_detected.hpp diff --git a/composable_kernel/include/ck/utility/is_known_at_compile_time.hpp b/include/ck/utility/is_known_at_compile_time.hpp similarity index 100% rename from composable_kernel/include/ck/utility/is_known_at_compile_time.hpp rename to include/ck/utility/is_known_at_compile_time.hpp diff --git a/composable_kernel/include/ck/utility/loop_scheduler.hpp b/include/ck/utility/loop_scheduler.hpp similarity index 100% rename from composable_kernel/include/ck/utility/loop_scheduler.hpp rename to include/ck/utility/loop_scheduler.hpp diff --git a/composable_kernel/include/ck/utility/magic_division.hpp b/include/ck/utility/magic_division.hpp similarity index 100% rename from composable_kernel/include/ck/utility/magic_division.hpp rename to include/ck/utility/magic_division.hpp diff --git a/composable_kernel/include/ck/utility/math.hpp b/include/ck/utility/math.hpp similarity index 100% rename from composable_kernel/include/ck/utility/math.hpp rename to include/ck/utility/math.hpp diff --git a/composable_kernel/include/ck/utility/math_v2.hpp b/include/ck/utility/math_v2.hpp similarity index 100% rename from composable_kernel/include/ck/utility/math_v2.hpp rename to include/ck/utility/math_v2.hpp diff --git a/composable_kernel/include/ck/utility/multi_index.hpp b/include/ck/utility/multi_index.hpp similarity index 100% rename from composable_kernel/include/ck/utility/multi_index.hpp rename to include/ck/utility/multi_index.hpp diff --git a/composable_kernel/include/ck/utility/number.hpp b/include/ck/utility/number.hpp similarity index 100% rename from composable_kernel/include/ck/utility/number.hpp rename to include/ck/utility/number.hpp diff --git a/composable_kernel/include/ck/utility/random_gen.hpp b/include/ck/utility/random_gen.hpp similarity index 100% rename from composable_kernel/include/ck/utility/random_gen.hpp rename to include/ck/utility/random_gen.hpp diff --git a/composable_kernel/include/ck/utility/reduction_common.hpp b/include/ck/utility/reduction_common.hpp similarity index 100% rename from composable_kernel/include/ck/utility/reduction_common.hpp rename to include/ck/utility/reduction_common.hpp diff --git a/composable_kernel/include/ck/utility/reduction_enums.hpp b/include/ck/utility/reduction_enums.hpp similarity index 100% rename from composable_kernel/include/ck/utility/reduction_enums.hpp rename to include/ck/utility/reduction_enums.hpp diff --git a/composable_kernel/include/ck/utility/reduction_functions_accumulate.hpp b/include/ck/utility/reduction_functions_accumulate.hpp similarity index 100% rename from composable_kernel/include/ck/utility/reduction_functions_accumulate.hpp rename to include/ck/utility/reduction_functions_accumulate.hpp diff --git a/composable_kernel/include/ck/utility/reduction_operator.hpp b/include/ck/utility/reduction_operator.hpp similarity index 100% rename from composable_kernel/include/ck/utility/reduction_operator.hpp rename to include/ck/utility/reduction_operator.hpp diff --git a/composable_kernel/include/ck/utility/sequence.hpp b/include/ck/utility/sequence.hpp similarity index 100% rename from composable_kernel/include/ck/utility/sequence.hpp rename to include/ck/utility/sequence.hpp diff --git a/composable_kernel/include/ck/utility/sequence_helper.hpp b/include/ck/utility/sequence_helper.hpp similarity index 100% rename from composable_kernel/include/ck/utility/sequence_helper.hpp rename to include/ck/utility/sequence_helper.hpp diff --git a/composable_kernel/include/ck/utility/span.hpp b/include/ck/utility/span.hpp similarity index 100% rename from composable_kernel/include/ck/utility/span.hpp rename to include/ck/utility/span.hpp diff --git a/composable_kernel/include/ck/utility/static_buffer.hpp b/include/ck/utility/static_buffer.hpp similarity index 100% rename from composable_kernel/include/ck/utility/static_buffer.hpp rename to include/ck/utility/static_buffer.hpp diff --git a/composable_kernel/include/ck/utility/statically_indexed_array.hpp b/include/ck/utility/statically_indexed_array.hpp similarity index 100% rename from composable_kernel/include/ck/utility/statically_indexed_array.hpp rename to include/ck/utility/statically_indexed_array.hpp diff --git a/composable_kernel/include/ck/utility/statically_indexed_array_multi_index.hpp b/include/ck/utility/statically_indexed_array_multi_index.hpp similarity index 100% rename from composable_kernel/include/ck/utility/statically_indexed_array_multi_index.hpp rename to include/ck/utility/statically_indexed_array_multi_index.hpp diff --git a/composable_kernel/include/ck/utility/synchronization.hpp b/include/ck/utility/synchronization.hpp similarity index 100% rename from composable_kernel/include/ck/utility/synchronization.hpp rename to include/ck/utility/synchronization.hpp diff --git a/composable_kernel/include/ck/utility/thread_group.hpp b/include/ck/utility/thread_group.hpp similarity index 100% rename from composable_kernel/include/ck/utility/thread_group.hpp rename to include/ck/utility/thread_group.hpp diff --git a/composable_kernel/include/ck/utility/transpose_vectors.hpp b/include/ck/utility/transpose_vectors.hpp similarity index 100% rename from composable_kernel/include/ck/utility/transpose_vectors.hpp rename to include/ck/utility/transpose_vectors.hpp diff --git a/composable_kernel/include/ck/utility/tuple.hpp b/include/ck/utility/tuple.hpp similarity index 100% rename from composable_kernel/include/ck/utility/tuple.hpp rename to include/ck/utility/tuple.hpp diff --git a/composable_kernel/include/ck/utility/tuple_helper.hpp b/include/ck/utility/tuple_helper.hpp similarity index 100% rename from composable_kernel/include/ck/utility/tuple_helper.hpp rename to include/ck/utility/tuple_helper.hpp diff --git a/composable_kernel/include/ck/utility/type.hpp b/include/ck/utility/type.hpp similarity index 100% rename from composable_kernel/include/ck/utility/type.hpp rename to include/ck/utility/type.hpp diff --git a/composable_kernel/include/ck/utility/type_convert.hpp b/include/ck/utility/type_convert.hpp similarity index 100% rename from composable_kernel/include/ck/utility/type_convert.hpp rename to include/ck/utility/type_convert.hpp diff --git a/composable_kernel/include/ck/utility/workgroup_barrier.hpp b/include/ck/utility/workgroup_barrier.hpp similarity index 100% rename from composable_kernel/include/ck/utility/workgroup_barrier.hpp rename to include/ck/utility/workgroup_barrier.hpp diff --git a/composable_kernel/include/ck/utility/workgroup_synchronization.hpp b/include/ck/utility/workgroup_synchronization.hpp similarity index 100% rename from composable_kernel/include/ck/utility/workgroup_synchronization.hpp rename to include/ck/utility/workgroup_synchronization.hpp diff --git a/composable_kernel/include/ck/version.h.in b/include/ck/version.h.in similarity index 100% rename from composable_kernel/include/ck/version.h.in rename to include/ck/version.h.in diff --git a/composable_kernel/library/CMakeLists.txt b/library/CMakeLists.txt similarity index 100% rename from composable_kernel/library/CMakeLists.txt rename to library/CMakeLists.txt diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_avgpool_bwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_avgpool_bwd.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_avgpool_bwd.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_avgpool_bwd.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_backward.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_backward.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_backward.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_backward.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_forward.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_forward.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_forward.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_forward.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_infer.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_infer.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_infer.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_infer.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_contraction.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_contraction.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_contraction.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_contraction.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_reduce.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_reduce.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_reduce.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_reduce.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_softmax.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_softmax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_softmax.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_softmax.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp rename to library/include/ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp diff --git a/composable_kernel/library/include/ck/library/reference_tensor_operation/gpu/naive_conv_fwd.hpp b/library/include/ck/library/reference_tensor_operation/gpu/naive_conv_fwd.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/reference_tensor_operation/gpu/naive_conv_fwd.hpp rename to library/include/ck/library/reference_tensor_operation/gpu/naive_conv_fwd.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp b/library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp rename to library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/device_operation_instance_factory.hpp b/library/include/ck/library/tensor_operation_instance/device_operation_instance_factory.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/device_operation_instance_factory.hpp rename to library/include/ck/library/tensor_operation_instance/device_operation_instance_factory.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/avg_pool3d_bwd.hpp b/library/include/ck/library/tensor_operation_instance/gpu/avg_pool3d_bwd.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/avg_pool3d_bwd.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/avg_pool3d_bwd.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batched_gemm.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_permute.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_permute.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_permute.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_permute.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_softmax_gemm_permute.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_softmax_gemm_permute.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_softmax_gemm_permute.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_softmax_gemm_permute.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_gemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_gemm.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_gemm.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_multi_d.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_multi_d.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_multi_d.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_multi_d.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_backward.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_backward.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_backward.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batchnorm_backward.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_forward.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_forward.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_forward.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batchnorm_forward.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_infer.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_infer.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_infer.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/batchnorm_infer.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/contraction_bilinear.hpp b/library/include/ck/library/tensor_operation_instance/gpu/contraction_bilinear.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/contraction_bilinear.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/contraction_bilinear.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/contraction_scale.hpp b/library/include/ck/library/tensor_operation_instance/gpu/contraction_scale.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/contraction_scale.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/contraction_scale.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange.hpp b/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/convolution_backward_data.hpp b/library/include/ck/library/tensor_operation_instance/gpu/convolution_backward_data.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/convolution_backward_data.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/convolution_backward_data.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/convolution_forward.hpp b/library/include/ck/library/tensor_operation_instance/gpu/convolution_forward.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/convolution_forward.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/convolution_forward.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/device_elementwise_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/device_elementwise_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/device_elementwise_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/device_elementwise_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_mean_squaremean_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_mean_squaremean_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_mean_squaremean_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/device_gemm_mean_squaremean_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/elementwise_normalization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/elementwise_normalization.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/elementwise_normalization.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/elementwise_normalization.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_add_fastgelu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_add_fastgelu.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_add_fastgelu.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_add_add_fastgelu.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_fastgelu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_fastgelu.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_fastgelu.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_add_fastgelu.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_multiply.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_multiply.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_multiply.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_add_multiply.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_bilinear.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_bilinear.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_bilinear.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_bilinear.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_fastgelu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_fastgelu.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_fastgelu.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_fastgelu.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_multiply_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_multiply_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_multiply_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_multiply_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_streamk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_streamk.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemm_streamk.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemm_streamk.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemv_splitk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemv_splitk.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/gemv_splitk.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/gemv_splitk.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_wmma_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_wmma_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_wmma_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_wmma_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_dl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_dl_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_dl_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_dl_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_wmma_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_wmma_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_wmma_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_wmma_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv2d_fwd_wmma_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv2d_fwd_wmma_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv2d_fwd_wmma_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv2d_fwd_wmma_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_dl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_dl_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_dl_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_dl_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_weight.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_weight.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_weight.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_weight.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_bias.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_bias.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_bias.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_bias.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fastgelu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fastgelu.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fastgelu.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fastgelu.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fixed_nk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fixed_nk.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fixed_nk.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fixed_nk.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/image_to_column.hpp b/library/include/ck/library/tensor_operation_instance/gpu/image_to_column.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/image_to_column.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/image_to_column.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/max_pool_bwd.hpp b/library/include/ck/library/tensor_operation_instance/gpu/max_pool_bwd.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/max_pool_bwd.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/max_pool_bwd.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/normalization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/normalization.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/normalization.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/normalization.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/normalization_swish.hpp b/library/include/ck/library/tensor_operation_instance/gpu/normalization_swish.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/normalization_swish.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/normalization_swish.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/pool3d_fwd.hpp b/library/include/ck/library/tensor_operation_instance/gpu/pool3d_fwd.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/pool3d_fwd.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/pool3d_fwd.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/gemm_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/gemm_quantization.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/gemm_quantization.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/quantization/gemm_quantization.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perchannel_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perchannel_quantization.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perchannel_quantization.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perchannel_quantization.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perlayer_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perlayer_quantization.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perlayer_quantization.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perlayer_quantization.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perchannel_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perchannel_quantization.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perchannel_quantization.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perchannel_quantization.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perlayer_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perlayer_quantization.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perlayer_quantization.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perlayer_quantization.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_impl_common.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_impl_common.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_impl_common.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_impl_common.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/reduce.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/reduce.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/reduce/reduce.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/reduce/reduce.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp diff --git a/composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk.hpp rename to library/include/ck/library/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk.hpp diff --git a/composable_kernel/library/include/ck/library/utility/algorithm.hpp b/library/include/ck/library/utility/algorithm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/algorithm.hpp rename to library/include/ck/library/utility/algorithm.hpp diff --git a/composable_kernel/library/include/ck/library/utility/check_err.hpp b/library/include/ck/library/utility/check_err.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/check_err.hpp rename to library/include/ck/library/utility/check_err.hpp diff --git a/composable_kernel/library/include/ck/library/utility/conv_common.hpp b/library/include/ck/library/utility/conv_common.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/conv_common.hpp rename to library/include/ck/library/utility/conv_common.hpp diff --git a/composable_kernel/library/include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp b/library/include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp rename to library/include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp diff --git a/composable_kernel/library/include/ck/library/utility/convolution_parameter.hpp b/library/include/ck/library/utility/convolution_parameter.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/convolution_parameter.hpp rename to library/include/ck/library/utility/convolution_parameter.hpp diff --git a/composable_kernel/library/include/ck/library/utility/device_memory.hpp b/library/include/ck/library/utility/device_memory.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/device_memory.hpp rename to library/include/ck/library/utility/device_memory.hpp diff --git a/composable_kernel/library/include/ck/library/utility/fill.hpp b/library/include/ck/library/utility/fill.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/fill.hpp rename to library/include/ck/library/utility/fill.hpp diff --git a/composable_kernel/library/include/ck/library/utility/host_common_util.hpp b/library/include/ck/library/utility/host_common_util.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/host_common_util.hpp rename to library/include/ck/library/utility/host_common_util.hpp diff --git a/composable_kernel/library/include/ck/library/utility/host_gemm.hpp b/library/include/ck/library/utility/host_gemm.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/host_gemm.hpp rename to library/include/ck/library/utility/host_gemm.hpp diff --git a/composable_kernel/library/include/ck/library/utility/host_tensor.hpp b/library/include/ck/library/utility/host_tensor.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/host_tensor.hpp rename to library/include/ck/library/utility/host_tensor.hpp diff --git a/composable_kernel/library/include/ck/library/utility/host_tensor_generator.hpp b/library/include/ck/library/utility/host_tensor_generator.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/host_tensor_generator.hpp rename to library/include/ck/library/utility/host_tensor_generator.hpp diff --git a/composable_kernel/library/include/ck/library/utility/iterator.hpp b/library/include/ck/library/utility/iterator.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/iterator.hpp rename to library/include/ck/library/utility/iterator.hpp diff --git a/composable_kernel/library/include/ck/library/utility/literals.hpp b/library/include/ck/library/utility/literals.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/literals.hpp rename to library/include/ck/library/utility/literals.hpp diff --git a/composable_kernel/library/include/ck/library/utility/numeric.hpp b/library/include/ck/library/utility/numeric.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/numeric.hpp rename to library/include/ck/library/utility/numeric.hpp diff --git a/composable_kernel/library/include/ck/library/utility/ranges.hpp b/library/include/ck/library/utility/ranges.hpp similarity index 100% rename from composable_kernel/library/include/ck/library/utility/ranges.hpp rename to library/include/ck/library/utility/ranges.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/avg_pool3d_bwd_ndhwc_instance_common.hpp b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/avg_pool3d_bwd_ndhwc_instance_common.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/avg_pool3d_bwd_ndhwc_instance_common.hpp rename to library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/avg_pool3d_bwd_ndhwc_instance_common.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batched_gemm_gemm/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp rename to library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batchnorm/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/batchnorm/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f64_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f64_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f64_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f64_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f64_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f64_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f64_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f64_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f64_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f64_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f64_instance.cpp rename to library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f64_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/column_to_image/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/column_to_image/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/column_to_image/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/column_to_image/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_1d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_1d_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_1d_instance.cpp rename to library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_1d_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_2d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_2d_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_2d_instance.cpp rename to library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_2d_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_3d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_3d_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_3d_instance.cpp rename to library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwc_3d_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp rename to library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/elementwise/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/elementwise/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/elementwise/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/elementwise/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/elementwise/device_normalize_instance.cpp b/library/src/tensor_operation_instance/gpu/elementwise/device_normalize_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/elementwise/device_normalize_instance.cpp rename to library/src/tensor_operation_instance/gpu/elementwise/device_normalize_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/elementwise_normalization/device_elementwise_normalization_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/elementwise_normalization/device_elementwise_normalization_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/elementwise_normalization/device_elementwise_normalization_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/elementwise_normalization/device_elementwise_normalization_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/common.hpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/common.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/common.hpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/common.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_add_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_add_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_add_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_add_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_opt_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_opt_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_opt_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_opt_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_interwave_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_interwave_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_interwave_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v2_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v2_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v2_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_add_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_add_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_add_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_add_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_opt_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_opt_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_opt_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_opt_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_interwave_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_interwave_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_interwave_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v2_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v2_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v2_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_add_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_add_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_add_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_add_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_opt_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_opt_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_opt_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_opt_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_interwave_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_interwave_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_interwave_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_add_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_add_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_add_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_add_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_opt_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_opt_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_opt_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_opt_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_interwave_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_interwave_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_interwave_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_add_multiply/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_fastgelu/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_multiply_add/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_splitk/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_splitk/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_streamk/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_streamk/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_streamk/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemm_streamk/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemm_streamk/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_streamk/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemm_streamk/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemm_streamk/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemv_splitk/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemv_splitk/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemv_splitk/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/gemv_splitk/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/gemv_splitk/device_gemv_splitk_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_gemm_bias/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/image_to_column/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/image_to_column/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/image_to_column/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/image_to_column/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_1d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_1d_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_1d_instance.cpp rename to library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_1d_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_2d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_2d_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_2d_instance.cpp rename to library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_2d_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_3d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_3d_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_3d_instance.cpp rename to library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwc_3d_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/max_pool_bwd/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/max_pool_bwd/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/max_pool_bwd_instance_common.hpp b/library/src/tensor_operation_instance/gpu/max_pool_bwd/max_pool_bwd_instance_common.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/max_pool_bwd/max_pool_bwd_instance_common.hpp rename to library/src/tensor_operation_instance/gpu/max_pool_bwd/max_pool_bwd_instance_common.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/normalization/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/normalization/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_f32_f32_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_f32_f32_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_f32_f32_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_f32_f32_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_groupnorm_swish_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_layernorm2d_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/normalization/device_layernorm4d_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/normalization/normalization_instance_common.hpp b/library/src/tensor_operation_instance/gpu/normalization/normalization_instance_common.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/normalization/normalization_instance_common.hpp rename to library/src/tensor_operation_instance/gpu/normalization/normalization_instance_common.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/pool3d_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/pool3d_fwd/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp rename to library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f16_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f16_instance.cpp rename to library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f16_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f32_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f32_instance.cpp rename to library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f32_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/pool_fwd_instance_common.hpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/pool_fwd_instance_common.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/pool3d_fwd/pool_fwd_instance_common.hpp rename to library/src/tensor_operation_instance/gpu/pool3d_fwd/pool_fwd_instance_common.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/quantization/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/quantization/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/conv2d_quantization_common.hpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/conv2d_quantization_common.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/conv2d_quantization_common.hpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/conv2d_quantization_common.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perchannel_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perchannel_quantization_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perchannel_quantization_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perchannel_quantization_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_int8_instance.hpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_int8_instance.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_int8_instance.hpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_int8_instance.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perchannel_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perchannel_quantization_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perchannel_quantization_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perchannel_quantization_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perlayer_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perlayer_quantization_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perlayer_quantization_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perlayer_quantization_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_int8_instance.hpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_int8_instance.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_int8_instance.hpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_int8_instance.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_instance.hpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_instance.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_instance.hpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_instance.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/gemm_quantization_common.hpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/gemm_quantization_common.hpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/quantization/gemm/gemm_quantization_common.hpp rename to library/src/tensor_operation_instance/gpu/quantization/gemm/gemm_quantization_common.hpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.cpp rename to library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.cpp rename to library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/CMakeLists.txt rename to library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/CMakeLists.txt diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_kn_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_kn_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_kn_mn_instance.cpp diff --git a/composable_kernel/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_nk_mn_instance.cpp similarity index 100% rename from composable_kernel/library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_nk_mn_instance.cpp rename to library/src/tensor_operation_instance/gpu/tall_and_skinny_gemm_splitk/device_tall_and_skinny_gemm_splitk_f16_f16_f16_mk_nk_mn_instance.cpp diff --git a/composable_kernel/library/src/utility/CMakeLists.txt b/library/src/utility/CMakeLists.txt similarity index 100% rename from composable_kernel/library/src/utility/CMakeLists.txt rename to library/src/utility/CMakeLists.txt diff --git a/composable_kernel/library/src/utility/convolution_parameter.cpp b/library/src/utility/convolution_parameter.cpp similarity index 100% rename from composable_kernel/library/src/utility/convolution_parameter.cpp rename to library/src/utility/convolution_parameter.cpp diff --git a/composable_kernel/library/src/utility/device_memory.cpp b/library/src/utility/device_memory.cpp similarity index 100% rename from composable_kernel/library/src/utility/device_memory.cpp rename to library/src/utility/device_memory.cpp diff --git a/composable_kernel/library/src/utility/host_tensor.cpp b/library/src/utility/host_tensor.cpp similarity index 100% rename from composable_kernel/library/src/utility/host_tensor.cpp rename to library/src/utility/host_tensor.cpp diff --git a/composable_kernel/profiler/CMakeLists.txt b/profiler/CMakeLists.txt similarity index 100% rename from composable_kernel/profiler/CMakeLists.txt rename to profiler/CMakeLists.txt diff --git a/composable_kernel/profiler/README.md b/profiler/README.md similarity index 100% rename from composable_kernel/profiler/README.md rename to profiler/README.md diff --git a/composable_kernel/profiler/include/profiler/data_type_enum.hpp b/profiler/include/profiler/data_type_enum.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/data_type_enum.hpp rename to profiler/include/profiler/data_type_enum.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp b/profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp rename to profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp b/profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp b/profiler/include/profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp b/profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batched_gemm_impl.hpp b/profiler/include/profiler/profile_batched_gemm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batched_gemm_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp b/profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp b/profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp b/profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp rename to profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batchnorm_backward_impl.hpp b/profiler/include/profiler/profile_batchnorm_backward_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batchnorm_backward_impl.hpp rename to profiler/include/profiler/profile_batchnorm_backward_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batchnorm_forward_impl.hpp b/profiler/include/profiler/profile_batchnorm_forward_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batchnorm_forward_impl.hpp rename to profiler/include/profiler/profile_batchnorm_forward_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_batchnorm_infer_impl.hpp b/profiler/include/profiler/profile_batchnorm_infer_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_batchnorm_infer_impl.hpp rename to profiler/include/profiler/profile_batchnorm_infer_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_contraction_impl.hpp b/profiler/include/profiler/profile_contraction_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_contraction_impl.hpp rename to profiler/include/profiler/profile_contraction_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_contraction_utils.hpp b/profiler/include/profiler/profile_contraction_utils.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_contraction_utils.hpp rename to profiler/include/profiler/profile_contraction_utils.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_conv_bwd_data_impl.hpp b/profiler/include/profiler/profile_conv_bwd_data_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_conv_bwd_data_impl.hpp rename to profiler/include/profiler/profile_conv_bwd_data_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp b/profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp rename to profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp b/profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp rename to profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_conv_fwd_impl.hpp b/profiler/include/profiler/profile_conv_fwd_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_conv_fwd_impl.hpp rename to profiler/include/profiler/profile_conv_fwd_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_conv_tensor_rearrange_impl.hpp b/profiler/include/profiler/profile_conv_tensor_rearrange_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_conv_tensor_rearrange_impl.hpp rename to profiler/include/profiler/profile_conv_tensor_rearrange_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_elementwise_layernorm_impl.hpp b/profiler/include/profiler/profile_elementwise_layernorm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_elementwise_layernorm_impl.hpp rename to profiler/include/profiler/profile_elementwise_layernorm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp b/profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp rename to profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp b/profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp rename to profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_add_multiply_impl.hpp b/profiler/include/profiler/profile_gemm_add_multiply_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_add_multiply_impl.hpp rename to profiler/include/profiler/profile_gemm_add_multiply_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_add_relu_add_layernorm_impl.hpp b/profiler/include/profiler/profile_gemm_add_relu_add_layernorm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_add_relu_add_layernorm_impl.hpp rename to profiler/include/profiler/profile_gemm_add_relu_add_layernorm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_bias_add_reduce_impl.hpp b/profiler/include/profiler/profile_gemm_bias_add_reduce_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_bias_add_reduce_impl.hpp rename to profiler/include/profiler/profile_gemm_bias_add_reduce_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_bilinear_impl.hpp b/profiler/include/profiler/profile_gemm_bilinear_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_bilinear_impl.hpp rename to profiler/include/profiler/profile_gemm_bilinear_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_fastgelu_impl.hpp b/profiler/include/profiler/profile_gemm_fastgelu_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_fastgelu_impl.hpp rename to profiler/include/profiler/profile_gemm_fastgelu_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_impl.hpp b/profiler/include/profiler/profile_gemm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_impl.hpp rename to profiler/include/profiler/profile_gemm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_multiply_add_impl.hpp b/profiler/include/profiler/profile_gemm_multiply_add_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_multiply_add_impl.hpp rename to profiler/include/profiler/profile_gemm_multiply_add_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_reduce_impl.hpp b/profiler/include/profiler/profile_gemm_reduce_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_reduce_impl.hpp rename to profiler/include/profiler/profile_gemm_reduce_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_splitk_impl.hpp b/profiler/include/profiler/profile_gemm_splitk_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_splitk_impl.hpp rename to profiler/include/profiler/profile_gemm_splitk_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemm_streamk_impl.hpp b/profiler/include/profiler/profile_gemm_streamk_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemm_streamk_impl.hpp rename to profiler/include/profiler/profile_gemm_streamk_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_gemv_splitk_impl.hpp b/profiler/include/profiler/profile_gemv_splitk_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_gemv_splitk_impl.hpp rename to profiler/include/profiler/profile_gemv_splitk_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_grouped_conv_bwd_data_impl.hpp b/profiler/include/profiler/profile_grouped_conv_bwd_data_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_grouped_conv_bwd_data_impl.hpp rename to profiler/include/profiler/profile_grouped_conv_bwd_data_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp b/profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp rename to profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp b/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp rename to profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_grouped_gemm_fastgelu_impl.hpp b/profiler/include/profiler/profile_grouped_gemm_fastgelu_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_grouped_gemm_fastgelu_impl.hpp rename to profiler/include/profiler/profile_grouped_gemm_fastgelu_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_grouped_gemm_impl.hpp b/profiler/include/profiler/profile_grouped_gemm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_grouped_gemm_impl.hpp rename to profiler/include/profiler/profile_grouped_gemm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_groupnorm_impl.hpp b/profiler/include/profiler/profile_groupnorm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_groupnorm_impl.hpp rename to profiler/include/profiler/profile_groupnorm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_layernorm_impl.hpp b/profiler/include/profiler/profile_layernorm_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_layernorm_impl.hpp rename to profiler/include/profiler/profile_layernorm_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_max_pool3d_bwd_impl.hpp b/profiler/include/profiler/profile_max_pool3d_bwd_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_max_pool3d_bwd_impl.hpp rename to profiler/include/profiler/profile_max_pool3d_bwd_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_pool3d_fwd_impl.hpp b/profiler/include/profiler/profile_pool3d_fwd_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_pool3d_fwd_impl.hpp rename to profiler/include/profiler/profile_pool3d_fwd_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_reduce_impl.hpp b/profiler/include/profiler/profile_reduce_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_reduce_impl.hpp rename to profiler/include/profiler/profile_reduce_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_softmax_impl.hpp b/profiler/include/profiler/profile_softmax_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_softmax_impl.hpp rename to profiler/include/profiler/profile_softmax_impl.hpp diff --git a/composable_kernel/profiler/include/profiler/profile_tall_and_skinny_gemm_splitk_impl.hpp b/profiler/include/profiler/profile_tall_and_skinny_gemm_splitk_impl.hpp similarity index 100% rename from composable_kernel/profiler/include/profiler/profile_tall_and_skinny_gemm_splitk_impl.hpp rename to profiler/include/profiler/profile_tall_and_skinny_gemm_splitk_impl.hpp diff --git a/composable_kernel/profiler/src/CMakeLists.txt b/profiler/src/CMakeLists.txt similarity index 100% rename from composable_kernel/profiler/src/CMakeLists.txt rename to profiler/src/CMakeLists.txt diff --git a/composable_kernel/profiler/src/profile_avg_pool3d_bwd.cpp b/profiler/src/profile_avg_pool3d_bwd.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_avg_pool3d_bwd.cpp rename to profiler/src/profile_avg_pool3d_bwd.cpp diff --git a/composable_kernel/profiler/src/profile_batched_gemm.cpp b/profiler/src/profile_batched_gemm.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_batched_gemm.cpp rename to profiler/src/profile_batched_gemm.cpp diff --git a/composable_kernel/profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp b/profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp rename to profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp diff --git a/composable_kernel/profiler/src/profile_batched_gemm_gemm.cpp b/profiler/src/profile_batched_gemm_gemm.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_batched_gemm_gemm.cpp rename to profiler/src/profile_batched_gemm_gemm.cpp diff --git a/composable_kernel/profiler/src/profile_batched_gemm_multi_d.cpp b/profiler/src/profile_batched_gemm_multi_d.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_batched_gemm_multi_d.cpp rename to profiler/src/profile_batched_gemm_multi_d.cpp diff --git a/composable_kernel/profiler/src/profile_batched_gemm_reduce.cpp b/profiler/src/profile_batched_gemm_reduce.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_batched_gemm_reduce.cpp rename to profiler/src/profile_batched_gemm_reduce.cpp diff --git a/composable_kernel/profiler/src/profile_batchnorm_bwd.cpp b/profiler/src/profile_batchnorm_bwd.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_batchnorm_bwd.cpp rename to profiler/src/profile_batchnorm_bwd.cpp diff --git a/composable_kernel/profiler/src/profile_batchnorm_fwd.cpp b/profiler/src/profile_batchnorm_fwd.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_batchnorm_fwd.cpp rename to profiler/src/profile_batchnorm_fwd.cpp diff --git a/composable_kernel/profiler/src/profile_batchnorm_infer.cpp b/profiler/src/profile_batchnorm_infer.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_batchnorm_infer.cpp rename to profiler/src/profile_batchnorm_infer.cpp diff --git a/composable_kernel/profiler/src/profile_contraction_bilinear.cpp b/profiler/src/profile_contraction_bilinear.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_contraction_bilinear.cpp rename to profiler/src/profile_contraction_bilinear.cpp diff --git a/composable_kernel/profiler/src/profile_contraction_scale.cpp b/profiler/src/profile_contraction_scale.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_contraction_scale.cpp rename to profiler/src/profile_contraction_scale.cpp diff --git a/composable_kernel/profiler/src/profile_conv_bwd_data.cpp b/profiler/src/profile_conv_bwd_data.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_conv_bwd_data.cpp rename to profiler/src/profile_conv_bwd_data.cpp diff --git a/composable_kernel/profiler/src/profile_conv_fwd.cpp b/profiler/src/profile_conv_fwd.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_conv_fwd.cpp rename to profiler/src/profile_conv_fwd.cpp diff --git a/composable_kernel/profiler/src/profile_conv_fwd_bias_relu.cpp b/profiler/src/profile_conv_fwd_bias_relu.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_conv_fwd_bias_relu.cpp rename to profiler/src/profile_conv_fwd_bias_relu.cpp diff --git a/composable_kernel/profiler/src/profile_conv_fwd_bias_relu_add.cpp b/profiler/src/profile_conv_fwd_bias_relu_add.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_conv_fwd_bias_relu_add.cpp rename to profiler/src/profile_conv_fwd_bias_relu_add.cpp diff --git a/composable_kernel/profiler/src/profile_conv_tensor_rearrange.cpp b/profiler/src/profile_conv_tensor_rearrange.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_conv_tensor_rearrange.cpp rename to profiler/src/profile_conv_tensor_rearrange.cpp diff --git a/composable_kernel/profiler/src/profile_gemm.cpp b/profiler/src/profile_gemm.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm.cpp rename to profiler/src/profile_gemm.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_add_add_fastgelu.cpp b/profiler/src/profile_gemm_add_add_fastgelu.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_add_add_fastgelu.cpp rename to profiler/src/profile_gemm_add_add_fastgelu.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_add_fastgelu.cpp b/profiler/src/profile_gemm_add_fastgelu.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_add_fastgelu.cpp rename to profiler/src/profile_gemm_add_fastgelu.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_add_multiply.cpp b/profiler/src/profile_gemm_add_multiply.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_add_multiply.cpp rename to profiler/src/profile_gemm_add_multiply.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_add_relu_add_layernorm.cpp b/profiler/src/profile_gemm_add_relu_add_layernorm.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_add_relu_add_layernorm.cpp rename to profiler/src/profile_gemm_add_relu_add_layernorm.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_bias_add_reduce.cpp b/profiler/src/profile_gemm_bias_add_reduce.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_bias_add_reduce.cpp rename to profiler/src/profile_gemm_bias_add_reduce.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_bilinear.cpp b/profiler/src/profile_gemm_bilinear.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_bilinear.cpp rename to profiler/src/profile_gemm_bilinear.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_fastgelu.cpp b/profiler/src/profile_gemm_fastgelu.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_fastgelu.cpp rename to profiler/src/profile_gemm_fastgelu.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_multiply_add.cpp b/profiler/src/profile_gemm_multiply_add.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_multiply_add.cpp rename to profiler/src/profile_gemm_multiply_add.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_reduce.cpp b/profiler/src/profile_gemm_reduce.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_reduce.cpp rename to profiler/src/profile_gemm_reduce.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_splitk.cpp b/profiler/src/profile_gemm_splitk.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_splitk.cpp rename to profiler/src/profile_gemm_splitk.cpp diff --git a/composable_kernel/profiler/src/profile_gemm_streamk.cpp b/profiler/src/profile_gemm_streamk.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemm_streamk.cpp rename to profiler/src/profile_gemm_streamk.cpp diff --git a/composable_kernel/profiler/src/profile_gemv_splitk.cpp b/profiler/src/profile_gemv_splitk.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_gemv_splitk.cpp rename to profiler/src/profile_gemv_splitk.cpp diff --git a/composable_kernel/profiler/src/profile_grouped_conv_bwd_data.cpp b/profiler/src/profile_grouped_conv_bwd_data.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_grouped_conv_bwd_data.cpp rename to profiler/src/profile_grouped_conv_bwd_data.cpp diff --git a/composable_kernel/profiler/src/profile_grouped_conv_bwd_weight.cpp b/profiler/src/profile_grouped_conv_bwd_weight.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_grouped_conv_bwd_weight.cpp rename to profiler/src/profile_grouped_conv_bwd_weight.cpp diff --git a/composable_kernel/profiler/src/profile_grouped_conv_fwd.cpp b/profiler/src/profile_grouped_conv_fwd.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_grouped_conv_fwd.cpp rename to profiler/src/profile_grouped_conv_fwd.cpp diff --git a/composable_kernel/profiler/src/profile_grouped_gemm.cpp b/profiler/src/profile_grouped_gemm.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_grouped_gemm.cpp rename to profiler/src/profile_grouped_gemm.cpp diff --git a/composable_kernel/profiler/src/profile_grouped_gemm_fastgelu.cpp b/profiler/src/profile_grouped_gemm_fastgelu.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_grouped_gemm_fastgelu.cpp rename to profiler/src/profile_grouped_gemm_fastgelu.cpp diff --git a/composable_kernel/profiler/src/profile_groupnorm.cpp b/profiler/src/profile_groupnorm.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_groupnorm.cpp rename to profiler/src/profile_groupnorm.cpp diff --git a/composable_kernel/profiler/src/profile_layernorm.cpp b/profiler/src/profile_layernorm.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_layernorm.cpp rename to profiler/src/profile_layernorm.cpp diff --git a/composable_kernel/profiler/src/profile_max_pool3d_bwd.cpp b/profiler/src/profile_max_pool3d_bwd.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_max_pool3d_bwd.cpp rename to profiler/src/profile_max_pool3d_bwd.cpp diff --git a/composable_kernel/profiler/src/profile_max_pool3d_fwd.cpp b/profiler/src/profile_max_pool3d_fwd.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_max_pool3d_fwd.cpp rename to profiler/src/profile_max_pool3d_fwd.cpp diff --git a/composable_kernel/profiler/src/profile_reduce.cpp b/profiler/src/profile_reduce.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_reduce.cpp rename to profiler/src/profile_reduce.cpp diff --git a/composable_kernel/profiler/src/profile_softmax.cpp b/profiler/src/profile_softmax.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_softmax.cpp rename to profiler/src/profile_softmax.cpp diff --git a/composable_kernel/profiler/src/profile_tall_and_skinny_gemm_splitk.cpp b/profiler/src/profile_tall_and_skinny_gemm_splitk.cpp similarity index 100% rename from composable_kernel/profiler/src/profile_tall_and_skinny_gemm_splitk.cpp rename to profiler/src/profile_tall_and_skinny_gemm_splitk.cpp diff --git a/composable_kernel/profiler/src/profiler.cpp b/profiler/src/profiler.cpp similarity index 100% rename from composable_kernel/profiler/src/profiler.cpp rename to profiler/src/profiler.cpp diff --git a/composable_kernel/profiler/src/profiler_operation_registry.hpp b/profiler/src/profiler_operation_registry.hpp similarity index 100% rename from composable_kernel/profiler/src/profiler_operation_registry.hpp rename to profiler/src/profiler_operation_registry.hpp diff --git a/composable_kernel/rbuild.ini b/rbuild.ini similarity index 100% rename from composable_kernel/rbuild.ini rename to rbuild.ini diff --git a/composable_kernel/requirements.txt b/requirements.txt similarity index 100% rename from composable_kernel/requirements.txt rename to requirements.txt diff --git a/composable_kernel/script/check_copyright_year.sh b/script/check_copyright_year.sh similarity index 100% rename from composable_kernel/script/check_copyright_year.sh rename to script/check_copyright_year.sh diff --git a/composable_kernel/script/clang-format-overwrite.sh b/script/clang-format-overwrite.sh similarity index 100% rename from composable_kernel/script/clang-format-overwrite.sh rename to script/clang-format-overwrite.sh diff --git a/composable_kernel/script/cmake-ck-dev.sh b/script/cmake-ck-dev.sh similarity index 100% rename from composable_kernel/script/cmake-ck-dev.sh rename to script/cmake-ck-dev.sh diff --git a/composable_kernel/script/cmake-ck-release.sh b/script/cmake-ck-release.sh similarity index 100% rename from composable_kernel/script/cmake-ck-release.sh rename to script/cmake-ck-release.sh diff --git a/composable_kernel/script/count_vgpr.sh b/script/count_vgpr.sh similarity index 100% rename from composable_kernel/script/count_vgpr.sh rename to script/count_vgpr.sh diff --git a/composable_kernel/script/hipclang_opt.sh b/script/hipclang_opt.sh similarity index 100% rename from composable_kernel/script/hipclang_opt.sh rename to script/hipclang_opt.sh diff --git a/composable_kernel/script/install_precommit.sh b/script/install_precommit.sh similarity index 100% rename from composable_kernel/script/install_precommit.sh rename to script/install_precommit.sh diff --git a/composable_kernel/script/parse_perf_data.py b/script/parse_perf_data.py similarity index 100% rename from composable_kernel/script/parse_perf_data.py rename to script/parse_perf_data.py diff --git a/composable_kernel/script/process_perf_data.py b/script/process_perf_data.py similarity index 100% rename from composable_kernel/script/process_perf_data.py rename to script/process_perf_data.py diff --git a/composable_kernel/script/process_perf_data.sh b/script/process_perf_data.sh similarity index 100% rename from composable_kernel/script/process_perf_data.sh rename to script/process_perf_data.sh diff --git a/composable_kernel/script/process_qa_data.sh b/script/process_qa_data.sh similarity index 100% rename from composable_kernel/script/process_qa_data.sh rename to script/process_qa_data.sh diff --git a/composable_kernel/script/profile_batched_gemm.sh b/script/profile_batched_gemm.sh similarity index 100% rename from composable_kernel/script/profile_batched_gemm.sh rename to script/profile_batched_gemm.sh diff --git a/composable_kernel/script/profile_conv_bwd_data.sh b/script/profile_conv_bwd_data.sh similarity index 100% rename from composable_kernel/script/profile_conv_bwd_data.sh rename to script/profile_conv_bwd_data.sh diff --git a/composable_kernel/script/profile_conv_fwd.sh b/script/profile_conv_fwd.sh similarity index 100% rename from composable_kernel/script/profile_conv_fwd.sh rename to script/profile_conv_fwd.sh diff --git a/composable_kernel/script/profile_gemm.sh b/script/profile_gemm.sh similarity index 100% rename from composable_kernel/script/profile_gemm.sh rename to script/profile_gemm.sh diff --git a/composable_kernel/script/profile_gemm_bilinear.sh b/script/profile_gemm_bilinear.sh similarity index 100% rename from composable_kernel/script/profile_gemm_bilinear.sh rename to script/profile_gemm_bilinear.sh diff --git a/composable_kernel/script/profile_grouped_gemm.sh b/script/profile_grouped_gemm.sh similarity index 100% rename from composable_kernel/script/profile_grouped_gemm.sh rename to script/profile_grouped_gemm.sh diff --git a/composable_kernel/script/profile_onnx_gemm.sh b/script/profile_onnx_gemm.sh similarity index 100% rename from composable_kernel/script/profile_onnx_gemm.sh rename to script/profile_onnx_gemm.sh diff --git a/composable_kernel/script/profile_reduce_no_index.sh b/script/profile_reduce_no_index.sh similarity index 100% rename from composable_kernel/script/profile_reduce_no_index.sh rename to script/profile_reduce_no_index.sh diff --git a/composable_kernel/script/profile_reduce_with_index.sh b/script/profile_reduce_with_index.sh similarity index 100% rename from composable_kernel/script/profile_reduce_with_index.sh rename to script/profile_reduce_with_index.sh diff --git a/composable_kernel/script/profile_resnet50.sh b/script/profile_resnet50.sh similarity index 100% rename from composable_kernel/script/profile_resnet50.sh rename to script/profile_resnet50.sh diff --git a/composable_kernel/script/profile_splitK_gemm.sh b/script/profile_splitK_gemm.sh similarity index 100% rename from composable_kernel/script/profile_splitK_gemm.sh rename to script/profile_splitK_gemm.sh diff --git a/composable_kernel/script/run_full_performance_tests.sh b/script/run_full_performance_tests.sh similarity index 100% rename from composable_kernel/script/run_full_performance_tests.sh rename to script/run_full_performance_tests.sh diff --git a/composable_kernel/script/run_performance_tests.sh b/script/run_performance_tests.sh similarity index 100% rename from composable_kernel/script/run_performance_tests.sh rename to script/run_performance_tests.sh diff --git a/composable_kernel/script/test_convnd_fwd.sh b/script/test_convnd_fwd.sh similarity index 100% rename from composable_kernel/script/test_convnd_fwd.sh rename to script/test_convnd_fwd.sh diff --git a/composable_kernel/script/test_reduce_no_index.sh b/script/test_reduce_no_index.sh similarity index 100% rename from composable_kernel/script/test_reduce_no_index.sh rename to script/test_reduce_no_index.sh diff --git a/composable_kernel/script/test_reduce_with_index.sh b/script/test_reduce_with_index.sh similarity index 100% rename from composable_kernel/script/test_reduce_with_index.sh rename to script/test_reduce_with_index.sh diff --git a/composable_kernel/script/uninstall_precommit.sh b/script/uninstall_precommit.sh similarity index 100% rename from composable_kernel/script/uninstall_precommit.sh rename to script/uninstall_precommit.sh diff --git a/composable_kernel/test/CMakeLists.txt b/test/CMakeLists.txt similarity index 100% rename from composable_kernel/test/CMakeLists.txt rename to test/CMakeLists.txt diff --git a/composable_kernel/test/batched_gemm/CMakeLists.txt b/test/batched_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/test/batched_gemm/CMakeLists.txt rename to test/batched_gemm/CMakeLists.txt diff --git a/composable_kernel/test/batched_gemm/batched_gemm_bf16.cpp b/test/batched_gemm/batched_gemm_bf16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm/batched_gemm_bf16.cpp rename to test/batched_gemm/batched_gemm_bf16.cpp diff --git a/composable_kernel/test/batched_gemm/batched_gemm_fp16.cpp b/test/batched_gemm/batched_gemm_fp16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm/batched_gemm_fp16.cpp rename to test/batched_gemm/batched_gemm_fp16.cpp diff --git a/composable_kernel/test/batched_gemm/batched_gemm_fp32.cpp b/test/batched_gemm/batched_gemm_fp32.cpp similarity index 100% rename from composable_kernel/test/batched_gemm/batched_gemm_fp32.cpp rename to test/batched_gemm/batched_gemm_fp32.cpp diff --git a/composable_kernel/test/batched_gemm/batched_gemm_int8.cpp b/test/batched_gemm/batched_gemm_int8.cpp similarity index 100% rename from composable_kernel/test/batched_gemm/batched_gemm_int8.cpp rename to test/batched_gemm/batched_gemm_int8.cpp diff --git a/composable_kernel/test/batched_gemm/test_batched_gemm.cpp b/test/batched_gemm/test_batched_gemm.cpp similarity index 100% rename from composable_kernel/test/batched_gemm/test_batched_gemm.cpp rename to test/batched_gemm/test_batched_gemm.cpp diff --git a/composable_kernel/test/batched_gemm_gemm/CMakeLists.txt b/test/batched_gemm_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/test/batched_gemm_gemm/CMakeLists.txt rename to test/batched_gemm_gemm/CMakeLists.txt diff --git a/composable_kernel/test/batched_gemm_gemm/test_batched_gemm_gemm_fp16.cpp b/test/batched_gemm_gemm/test_batched_gemm_gemm_fp16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm_gemm/test_batched_gemm_gemm_fp16.cpp rename to test/batched_gemm_gemm/test_batched_gemm_gemm_fp16.cpp diff --git a/composable_kernel/test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp b/test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp similarity index 100% rename from composable_kernel/test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp rename to test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp diff --git a/composable_kernel/test/batched_gemm_multi_d/CMakeLists.txt b/test/batched_gemm_multi_d/CMakeLists.txt similarity index 100% rename from composable_kernel/test/batched_gemm_multi_d/CMakeLists.txt rename to test/batched_gemm_multi_d/CMakeLists.txt diff --git a/composable_kernel/test/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp b/test/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp similarity index 100% rename from composable_kernel/test/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp rename to test/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp diff --git a/composable_kernel/test/batched_gemm_reduce/CMakeLists.txt b/test/batched_gemm_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/test/batched_gemm_reduce/CMakeLists.txt rename to test/batched_gemm_reduce/CMakeLists.txt diff --git a/composable_kernel/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp b/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp rename to test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp diff --git a/composable_kernel/test/batched_gemm_softmax_gemm/CMakeLists.txt b/test/batched_gemm_softmax_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm/CMakeLists.txt rename to test/batched_gemm_softmax_gemm/CMakeLists.txt diff --git a/composable_kernel/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_fp16.cpp b/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_fp16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_fp16.cpp rename to test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_fp16.cpp diff --git a/composable_kernel/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp b/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp rename to test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp diff --git a/composable_kernel/test/batched_gemm_softmax_gemm_permute/CMakeLists.txt b/test/batched_gemm_softmax_gemm_permute/CMakeLists.txt similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm_permute/CMakeLists.txt rename to test/batched_gemm_softmax_gemm_permute/CMakeLists.txt diff --git a/composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp rename to test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp diff --git a/composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp rename to test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp diff --git a/composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_util.hpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_util.hpp similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_util.hpp rename to test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_util.hpp diff --git a/composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16.cpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16.cpp rename to test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16.cpp diff --git a/composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16.cpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16.cpp similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16.cpp rename to test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16.cpp diff --git a/composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_util.hpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_util.hpp similarity index 100% rename from composable_kernel/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_util.hpp rename to test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_util.hpp diff --git a/composable_kernel/test/batchnorm/CMakeLists.txt b/test/batchnorm/CMakeLists.txt similarity index 100% rename from composable_kernel/test/batchnorm/CMakeLists.txt rename to test/batchnorm/CMakeLists.txt diff --git a/composable_kernel/test/batchnorm/batchnorm_bwd_rank_4.cpp b/test/batchnorm/batchnorm_bwd_rank_4.cpp similarity index 100% rename from composable_kernel/test/batchnorm/batchnorm_bwd_rank_4.cpp rename to test/batchnorm/batchnorm_bwd_rank_4.cpp diff --git a/composable_kernel/test/batchnorm/batchnorm_fwd_rank_4.cpp b/test/batchnorm/batchnorm_fwd_rank_4.cpp similarity index 100% rename from composable_kernel/test/batchnorm/batchnorm_fwd_rank_4.cpp rename to test/batchnorm/batchnorm_fwd_rank_4.cpp diff --git a/composable_kernel/test/batchnorm/batchnorm_infer_rank_4.cpp b/test/batchnorm/batchnorm_infer_rank_4.cpp similarity index 100% rename from composable_kernel/test/batchnorm/batchnorm_infer_rank_4.cpp rename to test/batchnorm/batchnorm_infer_rank_4.cpp diff --git a/composable_kernel/test/block_swizzle_test/block_swizzle_test.cpp b/test/block_swizzle_test/block_swizzle_test.cpp similarity index 100% rename from composable_kernel/test/block_swizzle_test/block_swizzle_test.cpp rename to test/block_swizzle_test/block_swizzle_test.cpp diff --git a/composable_kernel/test/block_swizzle_test/rebuild.sh b/test/block_swizzle_test/rebuild.sh similarity index 100% rename from composable_kernel/test/block_swizzle_test/rebuild.sh rename to test/block_swizzle_test/rebuild.sh diff --git a/composable_kernel/test/block_swizzle_test/simple_args.h b/test/block_swizzle_test/simple_args.h similarity index 100% rename from composable_kernel/test/block_swizzle_test/simple_args.h rename to test/block_swizzle_test/simple_args.h diff --git a/composable_kernel/test/block_to_ctile_map/CMakeLists.txt b/test/block_to_ctile_map/CMakeLists.txt similarity index 100% rename from composable_kernel/test/block_to_ctile_map/CMakeLists.txt rename to test/block_to_ctile_map/CMakeLists.txt diff --git a/composable_kernel/test/block_to_ctile_map/test_block_to_ctile_map.cpp b/test/block_to_ctile_map/test_block_to_ctile_map.cpp similarity index 100% rename from composable_kernel/test/block_to_ctile_map/test_block_to_ctile_map.cpp rename to test/block_to_ctile_map/test_block_to_ctile_map.cpp diff --git a/composable_kernel/test/contraction/CMakeLists.txt b/test/contraction/CMakeLists.txt similarity index 100% rename from composable_kernel/test/contraction/CMakeLists.txt rename to test/contraction/CMakeLists.txt diff --git a/composable_kernel/test/contraction/test_contraction.cpp b/test/contraction/test_contraction.cpp similarity index 100% rename from composable_kernel/test/contraction/test_contraction.cpp rename to test/contraction/test_contraction.cpp diff --git a/composable_kernel/test/contraction/test_contraction_interface.cpp b/test/contraction/test_contraction_interface.cpp similarity index 100% rename from composable_kernel/test/contraction/test_contraction_interface.cpp rename to test/contraction/test_contraction_interface.cpp diff --git a/composable_kernel/test/conv_tensor_rearrange/CMakeLists.txt b/test/conv_tensor_rearrange/CMakeLists.txt similarity index 100% rename from composable_kernel/test/conv_tensor_rearrange/CMakeLists.txt rename to test/conv_tensor_rearrange/CMakeLists.txt diff --git a/composable_kernel/test/conv_tensor_rearrange/test_conv_tensor_rearrange.cpp b/test/conv_tensor_rearrange/test_conv_tensor_rearrange.cpp similarity index 100% rename from composable_kernel/test/conv_tensor_rearrange/test_conv_tensor_rearrange.cpp rename to test/conv_tensor_rearrange/test_conv_tensor_rearrange.cpp diff --git a/composable_kernel/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp b/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp similarity index 100% rename from composable_kernel/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp rename to test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp diff --git a/composable_kernel/test/conv_util/CMakeLists.txt b/test/conv_util/CMakeLists.txt similarity index 100% rename from composable_kernel/test/conv_util/CMakeLists.txt rename to test/conv_util/CMakeLists.txt diff --git a/composable_kernel/test/conv_util/conv_util.cpp b/test/conv_util/conv_util.cpp similarity index 100% rename from composable_kernel/test/conv_util/conv_util.cpp rename to test/conv_util/conv_util.cpp diff --git a/composable_kernel/test/convnd_bwd_data/CMakeLists.txt b/test/convnd_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/test/convnd_bwd_data/CMakeLists.txt rename to test/convnd_bwd_data/CMakeLists.txt diff --git a/composable_kernel/test/convnd_bwd_data/convnd_bwd_data.cpp b/test/convnd_bwd_data/convnd_bwd_data.cpp similarity index 100% rename from composable_kernel/test/convnd_bwd_data/convnd_bwd_data.cpp rename to test/convnd_bwd_data/convnd_bwd_data.cpp diff --git a/composable_kernel/test/convnd_fwd/CMakeLists.txt b/test/convnd_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/test/convnd_fwd/CMakeLists.txt rename to test/convnd_fwd/CMakeLists.txt diff --git a/composable_kernel/test/convnd_fwd/convnd_fwd.cpp b/test/convnd_fwd/convnd_fwd.cpp similarity index 100% rename from composable_kernel/test/convnd_fwd/convnd_fwd.cpp rename to test/convnd_fwd/convnd_fwd.cpp diff --git a/composable_kernel/test/data_type/CMakeLists.txt b/test/data_type/CMakeLists.txt similarity index 100% rename from composable_kernel/test/data_type/CMakeLists.txt rename to test/data_type/CMakeLists.txt diff --git a/composable_kernel/test/data_type/test_bf8.cpp b/test/data_type/test_bf8.cpp similarity index 100% rename from composable_kernel/test/data_type/test_bf8.cpp rename to test/data_type/test_bf8.cpp diff --git a/composable_kernel/test/data_type/test_fp8.cpp b/test/data_type/test_fp8.cpp similarity index 100% rename from composable_kernel/test/data_type/test_fp8.cpp rename to test/data_type/test_fp8.cpp diff --git a/composable_kernel/test/data_type/test_int4.cpp b/test/data_type/test_int4.cpp similarity index 100% rename from composable_kernel/test/data_type/test_int4.cpp rename to test/data_type/test_int4.cpp diff --git a/composable_kernel/test/data_type/type_convert_const.cpp b/test/data_type/type_convert_const.cpp similarity index 100% rename from composable_kernel/test/data_type/type_convert_const.cpp rename to test/data_type/type_convert_const.cpp diff --git a/composable_kernel/test/elementwise_normalization/CMakeLists.txt b/test/elementwise_normalization/CMakeLists.txt similarity index 100% rename from composable_kernel/test/elementwise_normalization/CMakeLists.txt rename to test/elementwise_normalization/CMakeLists.txt diff --git a/composable_kernel/test/elementwise_normalization/test_elementwise_layernorm_fp16.cpp b/test/elementwise_normalization/test_elementwise_layernorm_fp16.cpp similarity index 100% rename from composable_kernel/test/elementwise_normalization/test_elementwise_layernorm_fp16.cpp rename to test/elementwise_normalization/test_elementwise_layernorm_fp16.cpp diff --git a/composable_kernel/test/gemm/CMakeLists.txt b/test/gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/test/gemm/CMakeLists.txt rename to test/gemm/CMakeLists.txt diff --git a/composable_kernel/test/gemm/gemm_bf16.cpp b/test/gemm/gemm_bf16.cpp similarity index 100% rename from composable_kernel/test/gemm/gemm_bf16.cpp rename to test/gemm/gemm_bf16.cpp diff --git a/composable_kernel/test/gemm/gemm_fp16.cpp b/test/gemm/gemm_fp16.cpp similarity index 100% rename from composable_kernel/test/gemm/gemm_fp16.cpp rename to test/gemm/gemm_fp16.cpp diff --git a/composable_kernel/test/gemm/gemm_fp32.cpp b/test/gemm/gemm_fp32.cpp similarity index 100% rename from composable_kernel/test/gemm/gemm_fp32.cpp rename to test/gemm/gemm_fp32.cpp diff --git a/composable_kernel/test/gemm/gemm_fp64.cpp b/test/gemm/gemm_fp64.cpp similarity index 100% rename from composable_kernel/test/gemm/gemm_fp64.cpp rename to test/gemm/gemm_fp64.cpp diff --git a/composable_kernel/test/gemm/gemm_int8.cpp b/test/gemm/gemm_int8.cpp similarity index 100% rename from composable_kernel/test/gemm/gemm_int8.cpp rename to test/gemm/gemm_int8.cpp diff --git a/composable_kernel/test/gemm/gemm_standalone_xdl_fp16.cpp b/test/gemm/gemm_standalone_xdl_fp16.cpp similarity index 100% rename from composable_kernel/test/gemm/gemm_standalone_xdl_fp16.cpp rename to test/gemm/gemm_standalone_xdl_fp16.cpp diff --git a/composable_kernel/test/gemm/gemm_util.hpp b/test/gemm/gemm_util.hpp similarity index 100% rename from composable_kernel/test/gemm/gemm_util.hpp rename to test/gemm/gemm_util.hpp diff --git a/composable_kernel/test/gemm/instance/gemm_f16_nn_instance.cpp b/test/gemm/instance/gemm_f16_nn_instance.cpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_f16_nn_instance.cpp rename to test/gemm/instance/gemm_f16_nn_instance.cpp diff --git a/composable_kernel/test/gemm/instance/gemm_f16_nn_instance.hpp b/test/gemm/instance/gemm_f16_nn_instance.hpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_f16_nn_instance.hpp rename to test/gemm/instance/gemm_f16_nn_instance.hpp diff --git a/composable_kernel/test/gemm/instance/gemm_f16_nt_instance.cpp b/test/gemm/instance/gemm_f16_nt_instance.cpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_f16_nt_instance.cpp rename to test/gemm/instance/gemm_f16_nt_instance.cpp diff --git a/composable_kernel/test/gemm/instance/gemm_f16_nt_instance.hpp b/test/gemm/instance/gemm_f16_nt_instance.hpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_f16_nt_instance.hpp rename to test/gemm/instance/gemm_f16_nt_instance.hpp diff --git a/composable_kernel/test/gemm/instance/gemm_f16_tn_instance.cpp b/test/gemm/instance/gemm_f16_tn_instance.cpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_f16_tn_instance.cpp rename to test/gemm/instance/gemm_f16_tn_instance.cpp diff --git a/composable_kernel/test/gemm/instance/gemm_f16_tn_instance.hpp b/test/gemm/instance/gemm_f16_tn_instance.hpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_f16_tn_instance.hpp rename to test/gemm/instance/gemm_f16_tn_instance.hpp diff --git a/composable_kernel/test/gemm/instance/gemm_f16_tt_instance.cpp b/test/gemm/instance/gemm_f16_tt_instance.cpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_f16_tt_instance.cpp rename to test/gemm/instance/gemm_f16_tt_instance.cpp diff --git a/composable_kernel/test/gemm/instance/gemm_f16_tt_instance.hpp b/test/gemm/instance/gemm_f16_tt_instance.hpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_f16_tt_instance.hpp rename to test/gemm/instance/gemm_f16_tt_instance.hpp diff --git a/composable_kernel/test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp b/test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp rename to test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp diff --git a/composable_kernel/test/gemm/instance/gemm_wavelet_f16_tn_instance.hpp b/test/gemm/instance/gemm_wavelet_f16_tn_instance.hpp similarity index 100% rename from composable_kernel/test/gemm/instance/gemm_wavelet_f16_tn_instance.hpp rename to test/gemm/instance/gemm_wavelet_f16_tn_instance.hpp diff --git a/composable_kernel/test/gemm/run_gemm_test.inc b/test/gemm/run_gemm_test.inc similarity index 100% rename from composable_kernel/test/gemm/run_gemm_test.inc rename to test/gemm/run_gemm_test.inc diff --git a/composable_kernel/test/gemm_layernorm/CMakeLists.txt b/test/gemm_layernorm/CMakeLists.txt similarity index 100% rename from composable_kernel/test/gemm_layernorm/CMakeLists.txt rename to test/gemm_layernorm/CMakeLists.txt diff --git a/composable_kernel/test/gemm_layernorm/test_gemm_add_relu_add_layernorm_fp16.cpp b/test/gemm_layernorm/test_gemm_add_relu_add_layernorm_fp16.cpp similarity index 100% rename from composable_kernel/test/gemm_layernorm/test_gemm_add_relu_add_layernorm_fp16.cpp rename to test/gemm_layernorm/test_gemm_add_relu_add_layernorm_fp16.cpp diff --git a/composable_kernel/test/gemm_reduce/CMakeLists.txt b/test/gemm_reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/test/gemm_reduce/CMakeLists.txt rename to test/gemm_reduce/CMakeLists.txt diff --git a/composable_kernel/test/gemm_reduce/gemm_reduce_fp16.cpp b/test/gemm_reduce/gemm_reduce_fp16.cpp similarity index 100% rename from composable_kernel/test/gemm_reduce/gemm_reduce_fp16.cpp rename to test/gemm_reduce/gemm_reduce_fp16.cpp diff --git a/composable_kernel/test/gemm_split_k/CMakeLists.txt b/test/gemm_split_k/CMakeLists.txt similarity index 100% rename from composable_kernel/test/gemm_split_k/CMakeLists.txt rename to test/gemm_split_k/CMakeLists.txt diff --git a/composable_kernel/test/gemm_split_k/test_gemm_splitk.cpp b/test/gemm_split_k/test_gemm_splitk.cpp similarity index 100% rename from composable_kernel/test/gemm_split_k/test_gemm_splitk.cpp rename to test/gemm_split_k/test_gemm_splitk.cpp diff --git a/composable_kernel/test/gemm_split_k/test_gemm_splitk_ut_cases.inc b/test/gemm_split_k/test_gemm_splitk_ut_cases.inc similarity index 100% rename from composable_kernel/test/gemm_split_k/test_gemm_splitk_ut_cases.inc rename to test/gemm_split_k/test_gemm_splitk_ut_cases.inc diff --git a/composable_kernel/test/gemm_split_k/test_gemm_splitk_util.hpp b/test/gemm_split_k/test_gemm_splitk_util.hpp similarity index 100% rename from composable_kernel/test/gemm_split_k/test_gemm_splitk_util.hpp rename to test/gemm_split_k/test_gemm_splitk_util.hpp diff --git a/composable_kernel/test/grouped_convnd_bwd_data/CMakeLists.txt b/test/grouped_convnd_bwd_data/CMakeLists.txt similarity index 100% rename from composable_kernel/test/grouped_convnd_bwd_data/CMakeLists.txt rename to test/grouped_convnd_bwd_data/CMakeLists.txt diff --git a/composable_kernel/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp b/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp similarity index 100% rename from composable_kernel/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp rename to test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp diff --git a/composable_kernel/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_wmma.cpp b/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_wmma.cpp similarity index 100% rename from composable_kernel/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_wmma.cpp rename to test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_wmma.cpp diff --git a/composable_kernel/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_xdl.cpp b/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_xdl.cpp similarity index 100% rename from composable_kernel/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_xdl.cpp rename to test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_xdl.cpp diff --git a/composable_kernel/test/grouped_convnd_bwd_weight/CMakeLists.txt b/test/grouped_convnd_bwd_weight/CMakeLists.txt similarity index 100% rename from composable_kernel/test/grouped_convnd_bwd_weight/CMakeLists.txt rename to test/grouped_convnd_bwd_weight/CMakeLists.txt diff --git a/composable_kernel/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp b/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp similarity index 100% rename from composable_kernel/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp rename to test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp diff --git a/composable_kernel/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_wmma.cpp b/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_wmma.cpp similarity index 100% rename from composable_kernel/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_wmma.cpp rename to test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_wmma.cpp diff --git a/composable_kernel/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_xdl.cpp b/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_xdl.cpp similarity index 100% rename from composable_kernel/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_xdl.cpp rename to test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_xdl.cpp diff --git a/composable_kernel/test/grouped_convnd_fwd/CMakeLists.txt b/test/grouped_convnd_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/test/grouped_convnd_fwd/CMakeLists.txt rename to test/grouped_convnd_fwd/CMakeLists.txt diff --git a/composable_kernel/test/grouped_convnd_fwd/grouped_convnd_fwd.cpp b/test/grouped_convnd_fwd/grouped_convnd_fwd.cpp similarity index 100% rename from composable_kernel/test/grouped_convnd_fwd/grouped_convnd_fwd.cpp rename to test/grouped_convnd_fwd/grouped_convnd_fwd.cpp diff --git a/composable_kernel/test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp b/test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp similarity index 100% rename from composable_kernel/test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp rename to test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp diff --git a/composable_kernel/test/grouped_gemm/CMakeLists.txt b/test/grouped_gemm/CMakeLists.txt similarity index 100% rename from composable_kernel/test/grouped_gemm/CMakeLists.txt rename to test/grouped_gemm/CMakeLists.txt diff --git a/composable_kernel/test/grouped_gemm/test_grouped_gemm_interface.cpp b/test/grouped_gemm/test_grouped_gemm_interface.cpp similarity index 100% rename from composable_kernel/test/grouped_gemm/test_grouped_gemm_interface.cpp rename to test/grouped_gemm/test_grouped_gemm_interface.cpp diff --git a/composable_kernel/test/grouped_gemm/test_grouped_gemm_splitk.cpp b/test/grouped_gemm/test_grouped_gemm_splitk.cpp similarity index 100% rename from composable_kernel/test/grouped_gemm/test_grouped_gemm_splitk.cpp rename to test/grouped_gemm/test_grouped_gemm_splitk.cpp diff --git a/composable_kernel/test/grouped_gemm/test_grouped_gemm_ut_cases.inc b/test/grouped_gemm/test_grouped_gemm_ut_cases.inc similarity index 100% rename from composable_kernel/test/grouped_gemm/test_grouped_gemm_ut_cases.inc rename to test/grouped_gemm/test_grouped_gemm_ut_cases.inc diff --git a/composable_kernel/test/grouped_gemm/test_grouped_gemm_util.hpp b/test/grouped_gemm/test_grouped_gemm_util.hpp similarity index 100% rename from composable_kernel/test/grouped_gemm/test_grouped_gemm_util.hpp rename to test/grouped_gemm/test_grouped_gemm_util.hpp diff --git a/composable_kernel/test/image_to_column/CMakeLists.txt b/test/image_to_column/CMakeLists.txt similarity index 100% rename from composable_kernel/test/image_to_column/CMakeLists.txt rename to test/image_to_column/CMakeLists.txt diff --git a/composable_kernel/test/image_to_column/test_image_to_column.cpp b/test/image_to_column/test_image_to_column.cpp similarity index 100% rename from composable_kernel/test/image_to_column/test_image_to_column.cpp rename to test/image_to_column/test_image_to_column.cpp diff --git a/composable_kernel/test/image_to_column/test_image_to_column_interface.cpp b/test/image_to_column/test_image_to_column_interface.cpp similarity index 100% rename from composable_kernel/test/image_to_column/test_image_to_column_interface.cpp rename to test/image_to_column/test_image_to_column_interface.cpp diff --git a/composable_kernel/test/magic_number_division/CMakeLists.txt b/test/magic_number_division/CMakeLists.txt similarity index 100% rename from composable_kernel/test/magic_number_division/CMakeLists.txt rename to test/magic_number_division/CMakeLists.txt diff --git a/composable_kernel/test/magic_number_division/magic_number_division.cpp b/test/magic_number_division/magic_number_division.cpp similarity index 100% rename from composable_kernel/test/magic_number_division/magic_number_division.cpp rename to test/magic_number_division/magic_number_division.cpp diff --git a/composable_kernel/test/normalization/CMakeLists.txt b/test/normalization/CMakeLists.txt similarity index 100% rename from composable_kernel/test/normalization/CMakeLists.txt rename to test/normalization/CMakeLists.txt diff --git a/composable_kernel/test/normalization/test_groupnorm_fp16.cpp b/test/normalization/test_groupnorm_fp16.cpp similarity index 100% rename from composable_kernel/test/normalization/test_groupnorm_fp16.cpp rename to test/normalization/test_groupnorm_fp16.cpp diff --git a/composable_kernel/test/normalization/test_groupnorm_fp32.cpp b/test/normalization/test_groupnorm_fp32.cpp similarity index 100% rename from composable_kernel/test/normalization/test_groupnorm_fp32.cpp rename to test/normalization/test_groupnorm_fp32.cpp diff --git a/composable_kernel/test/normalization/test_layernorm2d_fp16.cpp b/test/normalization/test_layernorm2d_fp16.cpp similarity index 100% rename from composable_kernel/test/normalization/test_layernorm2d_fp16.cpp rename to test/normalization/test_layernorm2d_fp16.cpp diff --git a/composable_kernel/test/normalization/test_layernorm2d_fp32.cpp b/test/normalization/test_layernorm2d_fp32.cpp similarity index 100% rename from composable_kernel/test/normalization/test_layernorm2d_fp32.cpp rename to test/normalization/test_layernorm2d_fp32.cpp diff --git a/composable_kernel/test/pool/CMakeLists.txt b/test/pool/CMakeLists.txt similarity index 100% rename from composable_kernel/test/pool/CMakeLists.txt rename to test/pool/CMakeLists.txt diff --git a/composable_kernel/test/pool/test_avg_pool3d_bwd.cpp b/test/pool/test_avg_pool3d_bwd.cpp similarity index 100% rename from composable_kernel/test/pool/test_avg_pool3d_bwd.cpp rename to test/pool/test_avg_pool3d_bwd.cpp diff --git a/composable_kernel/test/pool/test_avg_pool3d_fwd.cpp b/test/pool/test_avg_pool3d_fwd.cpp similarity index 100% rename from composable_kernel/test/pool/test_avg_pool3d_fwd.cpp rename to test/pool/test_avg_pool3d_fwd.cpp diff --git a/composable_kernel/test/pool/test_max_pool3d_bwd.cpp b/test/pool/test_max_pool3d_bwd.cpp similarity index 100% rename from composable_kernel/test/pool/test_max_pool3d_bwd.cpp rename to test/pool/test_max_pool3d_bwd.cpp diff --git a/composable_kernel/test/pool/test_max_pool3d_fwd.cpp b/test/pool/test_max_pool3d_fwd.cpp similarity index 100% rename from composable_kernel/test/pool/test_max_pool3d_fwd.cpp rename to test/pool/test_max_pool3d_fwd.cpp diff --git a/composable_kernel/test/pool/test_pool_fwd_common.hpp b/test/pool/test_pool_fwd_common.hpp similarity index 100% rename from composable_kernel/test/pool/test_pool_fwd_common.hpp rename to test/pool/test_pool_fwd_common.hpp diff --git a/composable_kernel/test/reduce/CMakeLists.txt b/test/reduce/CMakeLists.txt similarity index 100% rename from composable_kernel/test/reduce/CMakeLists.txt rename to test/reduce/CMakeLists.txt diff --git a/composable_kernel/test/reduce/reduce_no_index.cpp b/test/reduce/reduce_no_index.cpp similarity index 100% rename from composable_kernel/test/reduce/reduce_no_index.cpp rename to test/reduce/reduce_no_index.cpp diff --git a/composable_kernel/test/reduce/reduce_with_index.cpp b/test/reduce/reduce_with_index.cpp similarity index 100% rename from composable_kernel/test/reduce/reduce_with_index.cpp rename to test/reduce/reduce_with_index.cpp diff --git a/composable_kernel/test/reference_conv_fwd/CMakeLists.txt b/test/reference_conv_fwd/CMakeLists.txt similarity index 100% rename from composable_kernel/test/reference_conv_fwd/CMakeLists.txt rename to test/reference_conv_fwd/CMakeLists.txt diff --git a/composable_kernel/test/reference_conv_fwd/reference_conv_fwd.cpp b/test/reference_conv_fwd/reference_conv_fwd.cpp similarity index 100% rename from composable_kernel/test/reference_conv_fwd/reference_conv_fwd.cpp rename to test/reference_conv_fwd/reference_conv_fwd.cpp diff --git a/composable_kernel/test/softmax/CMakeLists.txt b/test/softmax/CMakeLists.txt similarity index 100% rename from composable_kernel/test/softmax/CMakeLists.txt rename to test/softmax/CMakeLists.txt diff --git a/composable_kernel/test/softmax/test_softmax_interface.cpp b/test/softmax/test_softmax_interface.cpp similarity index 100% rename from composable_kernel/test/softmax/test_softmax_interface.cpp rename to test/softmax/test_softmax_interface.cpp diff --git a/composable_kernel/test/softmax/test_softmax_rank3.cpp b/test/softmax/test_softmax_rank3.cpp similarity index 100% rename from composable_kernel/test/softmax/test_softmax_rank3.cpp rename to test/softmax/test_softmax_rank3.cpp diff --git a/composable_kernel/test/softmax/test_softmax_rank4.cpp b/test/softmax/test_softmax_rank4.cpp similarity index 100% rename from composable_kernel/test/softmax/test_softmax_rank4.cpp rename to test/softmax/test_softmax_rank4.cpp diff --git a/composable_kernel/test/softmax/test_softmax_ut_cases.inc b/test/softmax/test_softmax_ut_cases.inc similarity index 100% rename from composable_kernel/test/softmax/test_softmax_ut_cases.inc rename to test/softmax/test_softmax_ut_cases.inc diff --git a/composable_kernel/test/softmax/test_softmax_util.hpp b/test/softmax/test_softmax_util.hpp similarity index 100% rename from composable_kernel/test/softmax/test_softmax_util.hpp rename to test/softmax/test_softmax_util.hpp diff --git a/composable_kernel/test/space_filling_curve/CMakeLists.txt b/test/space_filling_curve/CMakeLists.txt similarity index 100% rename from composable_kernel/test/space_filling_curve/CMakeLists.txt rename to test/space_filling_curve/CMakeLists.txt diff --git a/composable_kernel/test/space_filling_curve/space_filling_curve.cpp b/test/space_filling_curve/space_filling_curve.cpp similarity index 100% rename from composable_kernel/test/space_filling_curve/space_filling_curve.cpp rename to test/space_filling_curve/space_filling_curve.cpp diff --git a/composable_kernel/test/wmma_op/CMakeLists.txt b/test/wmma_op/CMakeLists.txt similarity index 100% rename from composable_kernel/test/wmma_op/CMakeLists.txt rename to test/wmma_op/CMakeLists.txt diff --git a/composable_kernel/test/wmma_op/wmma_op.cpp b/test/wmma_op/wmma_op.cpp similarity index 100% rename from composable_kernel/test/wmma_op/wmma_op.cpp rename to test/wmma_op/wmma_op.cpp diff --git a/composable_kernel/test/wmma_op/wmma_op_util.hpp b/test/wmma_op/wmma_op_util.hpp similarity index 100% rename from composable_kernel/test/wmma_op/wmma_op_util.hpp rename to test/wmma_op/wmma_op_util.hpp