diff --git a/.cache/clangd/index/add_device_operation_instance.hpp.C89DB8EBC3EFFE7D.idx b/.cache/clangd/index/add_device_operation_instance.hpp.C89DB8EBC3EFFE7D.idx new file mode 100755 index 0000000000000000000000000000000000000000..50693bbf1c7652bd6a4b695f50269131572d869f Binary files /dev/null and b/.cache/clangd/index/add_device_operation_instance.hpp.C89DB8EBC3EFFE7D.idx differ diff --git a/.cache/clangd/index/algorithm.hpp.3E7EDFC4AA4FB32B.idx b/.cache/clangd/index/algorithm.hpp.3E7EDFC4AA4FB32B.idx new file mode 100755 index 0000000000000000000000000000000000000000..94088da4eddbf26ced021a3dc65670ed81446ca9 Binary files /dev/null and b/.cache/clangd/index/algorithm.hpp.3E7EDFC4AA4FB32B.idx differ diff --git a/.cache/clangd/index/amd_address_space.hpp.FB92D54F0A7BAF6E.idx b/.cache/clangd/index/amd_address_space.hpp.FB92D54F0A7BAF6E.idx new file mode 100755 index 0000000000000000000000000000000000000000..608fe43dd2f44ba7cb12200346b5cf3fdfb3ffed Binary files /dev/null and b/.cache/clangd/index/amd_address_space.hpp.FB92D54F0A7BAF6E.idx differ diff --git a/.cache/clangd/index/amd_buffer_addressing.hpp.7FB2D5AE48874EC4.idx b/.cache/clangd/index/amd_buffer_addressing.hpp.7FB2D5AE48874EC4.idx new file mode 100755 index 0000000000000000000000000000000000000000..c614279cadec8251fed1671cc7d1798823b20abd Binary files /dev/null and b/.cache/clangd/index/amd_buffer_addressing.hpp.7FB2D5AE48874EC4.idx differ diff --git a/.cache/clangd/index/amd_gemm_dpp.hpp.55564336A8EE078E.idx b/.cache/clangd/index/amd_gemm_dpp.hpp.55564336A8EE078E.idx new file mode 100755 index 0000000000000000000000000000000000000000..49910bac90da4d573f8df2a1697494a007b4100d Binary files /dev/null and b/.cache/clangd/index/amd_gemm_dpp.hpp.55564336A8EE078E.idx differ diff --git a/.cache/clangd/index/amd_inline_asm.hpp.7777A491B073ADB0.idx b/.cache/clangd/index/amd_inline_asm.hpp.7777A491B073ADB0.idx new file mode 100755 index 0000000000000000000000000000000000000000..0515b5691dfc453c2f3870d1aaa8fe3d3c46bbf0 Binary files /dev/null and b/.cache/clangd/index/amd_inline_asm.hpp.7777A491B073ADB0.idx differ diff --git a/.cache/clangd/index/amd_lds.hpp.3739CBB0B218918C.idx b/.cache/clangd/index/amd_lds.hpp.3739CBB0B218918C.idx new file mode 100755 index 0000000000000000000000000000000000000000..02f003d38cefe7070e533444b389ea89a37d235f Binary files /dev/null and b/.cache/clangd/index/amd_lds.hpp.3739CBB0B218918C.idx differ diff --git a/.cache/clangd/index/amd_wave_read_first_lane.hpp.F8A301BEB898DCEC.idx b/.cache/clangd/index/amd_wave_read_first_lane.hpp.F8A301BEB898DCEC.idx new file mode 100755 index 0000000000000000000000000000000000000000..4337c456aa2a774a8ba5fbf4181be163dac067f1 Binary files /dev/null and b/.cache/clangd/index/amd_wave_read_first_lane.hpp.F8A301BEB898DCEC.idx differ diff --git a/.cache/clangd/index/amd_wmma.hpp.143B2B78EF519046.idx b/.cache/clangd/index/amd_wmma.hpp.143B2B78EF519046.idx new file mode 100755 index 0000000000000000000000000000000000000000..145a9262f3b66c0bfccb8ed74264080319232490 Binary files /dev/null and b/.cache/clangd/index/amd_wmma.hpp.143B2B78EF519046.idx differ diff --git a/.cache/clangd/index/amd_xdlops.hpp.5A2FDE085CB4BC8A.idx b/.cache/clangd/index/amd_xdlops.hpp.5A2FDE085CB4BC8A.idx new file mode 100755 index 0000000000000000000000000000000000000000..44ede942d27bafa742f9f957a2aab379492d5f46 Binary files /dev/null and b/.cache/clangd/index/amd_xdlops.hpp.5A2FDE085CB4BC8A.idx differ diff --git a/.cache/clangd/index/array.hpp.EEBA77115A6888DA.idx b/.cache/clangd/index/array.hpp.EEBA77115A6888DA.idx new file mode 100755 index 0000000000000000000000000000000000000000..542e77153c40a38d2c47e4de696587057dd31cc2 Binary files /dev/null and b/.cache/clangd/index/array.hpp.EEBA77115A6888DA.idx differ diff --git a/.cache/clangd/index/avg_pool3d_bwd.hpp.40079D84C914EB9A.idx b/.cache/clangd/index/avg_pool3d_bwd.hpp.40079D84C914EB9A.idx new file mode 100755 index 0000000000000000000000000000000000000000..45c5f80942202e01938b0a73b3df5985bdf7796c Binary files /dev/null and b/.cache/clangd/index/avg_pool3d_bwd.hpp.40079D84C914EB9A.idx differ diff --git a/.cache/clangd/index/avg_pool3d_bwd_ndhwc_instance_common.hpp.EDEFB32170FF2FE7.idx b/.cache/clangd/index/avg_pool3d_bwd_ndhwc_instance_common.hpp.EDEFB32170FF2FE7.idx new file mode 100755 index 0000000000000000000000000000000000000000..5640c2495f5ed2bfe7912ac911fa15fb6596a732 Binary files /dev/null and b/.cache/clangd/index/avg_pool3d_bwd_ndhwc_instance_common.hpp.EDEFB32170FF2FE7.idx differ diff --git a/.cache/clangd/index/avgpool3d_bwd_bf16.cpp.CC3B9B35F218D95E.idx b/.cache/clangd/index/avgpool3d_bwd_bf16.cpp.CC3B9B35F218D95E.idx new file mode 100755 index 0000000000000000000000000000000000000000..11d356e4bb9ebc7ddd119bd433675c1b4145a128 Binary files /dev/null and b/.cache/clangd/index/avgpool3d_bwd_bf16.cpp.CC3B9B35F218D95E.idx differ diff --git a/.cache/clangd/index/avgpool3d_bwd_common.hpp.C26898608FBBAD5A.idx b/.cache/clangd/index/avgpool3d_bwd_common.hpp.C26898608FBBAD5A.idx new file mode 100755 index 0000000000000000000000000000000000000000..11302a846b4f26243cbeca462a7b53f3838a4de8 Binary files /dev/null and b/.cache/clangd/index/avgpool3d_bwd_common.hpp.C26898608FBBAD5A.idx differ diff --git a/.cache/clangd/index/avgpool3d_bwd_fp16.cpp.11C3DA7A01E70C82.idx b/.cache/clangd/index/avgpool3d_bwd_fp16.cpp.11C3DA7A01E70C82.idx new file mode 100755 index 0000000000000000000000000000000000000000..dc381a607236b05c0a3889377f367169a3e7ea7c Binary files /dev/null and b/.cache/clangd/index/avgpool3d_bwd_fp16.cpp.11C3DA7A01E70C82.idx differ diff --git a/.cache/clangd/index/avgpool3d_bwd_fp32.cpp.4BE51B531517C414.idx b/.cache/clangd/index/avgpool3d_bwd_fp32.cpp.4BE51B531517C414.idx new file mode 100755 index 0000000000000000000000000000000000000000..07a426bfd972935c0809b6d60401578053a93216 Binary files /dev/null and b/.cache/clangd/index/avgpool3d_bwd_fp32.cpp.4BE51B531517C414.idx differ diff --git a/.cache/clangd/index/batched_gemm.hpp.E9C31BFE1C18D556.idx b/.cache/clangd/index/batched_gemm.hpp.E9C31BFE1C18D556.idx new file mode 100755 index 0000000000000000000000000000000000000000..19d0939a1d3331553c88b786c01bb059d052ad80 Binary files /dev/null and b/.cache/clangd/index/batched_gemm.hpp.E9C31BFE1C18D556.idx differ diff --git a/.cache/clangd/index/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp.25B35CBDBE38B07B.idx b/.cache/clangd/index/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp.25B35CBDBE38B07B.idx new file mode 100755 index 0000000000000000000000000000000000000000..76c32d800f936f1761ed15e3b84c5aee96ee81fc Binary files /dev/null and b/.cache/clangd/index/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp.25B35CBDBE38B07B.idx differ diff --git a/.cache/clangd/index/batched_gemm_add_relu_gemm_add.hpp.B9BDBBF21D219B22.idx b/.cache/clangd/index/batched_gemm_add_relu_gemm_add.hpp.B9BDBBF21D219B22.idx new file mode 100755 index 0000000000000000000000000000000000000000..d037c1ee38cef3cc3f1eb910c6ee9901ae234982 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_add_relu_gemm_add.hpp.B9BDBBF21D219B22.idx differ diff --git a/.cache/clangd/index/batched_gemm_bias_e_permute_xdl_fp16.cpp.6413F9674606CEBE.idx b/.cache/clangd/index/batched_gemm_bias_e_permute_xdl_fp16.cpp.6413F9674606CEBE.idx new file mode 100755 index 0000000000000000000000000000000000000000..394f471dd928b29eb09df39f9c41d62022d70314 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_bias_e_permute_xdl_fp16.cpp.6413F9674606CEBE.idx differ diff --git a/.cache/clangd/index/batched_gemm_bias_softmax_gemm_permute.hpp.3291370BDDEB76AF.idx b/.cache/clangd/index/batched_gemm_bias_softmax_gemm_permute.hpp.3291370BDDEB76AF.idx new file mode 100755 index 0000000000000000000000000000000000000000..902d5acac4b8e4d9944197f674a2f7aa140b33e9 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_bias_softmax_gemm_permute.hpp.3291370BDDEB76AF.idx differ diff --git a/.cache/clangd/index/batched_gemm_gemm.hpp.3359348295C6E9CF.idx b/.cache/clangd/index/batched_gemm_gemm.hpp.3359348295C6E9CF.idx new file mode 100755 index 0000000000000000000000000000000000000000..ef6c3e6aa5e27bc4e6ac1b520c289862ff7c6db5 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_gemm.hpp.3359348295C6E9CF.idx differ diff --git a/.cache/clangd/index/batched_gemm_gemm_xdl_bf16.cpp.38FE56A993CA3FF4.idx b/.cache/clangd/index/batched_gemm_gemm_xdl_bf16.cpp.38FE56A993CA3FF4.idx new file mode 100755 index 0000000000000000000000000000000000000000..69c184d54b789584d78be307d7e17ee2b4e0cbd1 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_gemm_xdl_bf16.cpp.38FE56A993CA3FF4.idx differ diff --git a/.cache/clangd/index/batched_gemm_gemm_xdl_fp16.cpp.F5472E9B2342F0A2.idx b/.cache/clangd/index/batched_gemm_gemm_xdl_fp16.cpp.F5472E9B2342F0A2.idx new file mode 100755 index 0000000000000000000000000000000000000000..f323c850f6bebe534ebecd6457c44ce4fe9fafbf Binary files /dev/null and b/.cache/clangd/index/batched_gemm_gemm_xdl_fp16.cpp.F5472E9B2342F0A2.idx differ diff --git a/.cache/clangd/index/batched_gemm_gemm_xdl_fp32.cpp.958D740430E8AB79.idx b/.cache/clangd/index/batched_gemm_gemm_xdl_fp32.cpp.958D740430E8AB79.idx new file mode 100755 index 0000000000000000000000000000000000000000..e5ab2c6614a0b60135d5c57c4540bbafbecebae3 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_gemm_xdl_fp32.cpp.958D740430E8AB79.idx differ diff --git a/.cache/clangd/index/batched_gemm_gemm_xdl_int8.cpp.EF48A52F4EBD9A62.idx b/.cache/clangd/index/batched_gemm_gemm_xdl_int8.cpp.EF48A52F4EBD9A62.idx new file mode 100755 index 0000000000000000000000000000000000000000..ec7787e93f504ac8f6ac40599580be52a750d036 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_gemm_xdl_int8.cpp.EF48A52F4EBD9A62.idx differ diff --git a/.cache/clangd/index/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp.20C4D0C379DDEEBB.idx b/.cache/clangd/index/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp.20C4D0C379DDEEBB.idx new file mode 100755 index 0000000000000000000000000000000000000000..0701564354c24c0a56e68ed0d6d155c43a112927 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp.20C4D0C379DDEEBB.idx differ diff --git a/.cache/clangd/index/batched_gemm_multi_d.hpp.4E3016E76D827F13.idx b/.cache/clangd/index/batched_gemm_multi_d.hpp.4E3016E76D827F13.idx new file mode 100755 index 0000000000000000000000000000000000000000..e073f3b8fd794c64cf7adfdbd7bc808a770a93bc Binary files /dev/null and b/.cache/clangd/index/batched_gemm_multi_d.hpp.4E3016E76D827F13.idx differ diff --git a/.cache/clangd/index/batched_gemm_reduce_fp16.cpp.795EB59AA922209F.idx b/.cache/clangd/index/batched_gemm_reduce_fp16.cpp.795EB59AA922209F.idx new file mode 100755 index 0000000000000000000000000000000000000000..531addd0c345bebe6a709d6937296cf576f80747 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_reduce_fp16.cpp.795EB59AA922209F.idx differ diff --git a/.cache/clangd/index/batched_gemm_reduce_xdl_fp16.cpp.6EA78CC685DE72B1.idx b/.cache/clangd/index/batched_gemm_reduce_xdl_fp16.cpp.6EA78CC685DE72B1.idx new file mode 100755 index 0000000000000000000000000000000000000000..c8f75891626f2bc507dd1743cbc654f2ace07971 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_reduce_xdl_fp16.cpp.6EA78CC685DE72B1.idx differ diff --git a/.cache/clangd/index/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp.3B79E782257965A3.idx b/.cache/clangd/index/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp.3B79E782257965A3.idx new file mode 100755 index 0000000000000000000000000000000000000000..00857c098745021f4a84a6af19caf5b7efe1c202 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp.3B79E782257965A3.idx differ diff --git a/.cache/clangd/index/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp.71938F78DFDFE1BE.idx b/.cache/clangd/index/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp.71938F78DFDFE1BE.idx new file mode 100755 index 0000000000000000000000000000000000000000..77ff6c2630e7afbbefe14ff49fda93904efe8164 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp.71938F78DFDFE1BE.idx differ diff --git a/.cache/clangd/index/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp.74FE791FA1A9F5A4.idx b/.cache/clangd/index/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp.74FE791FA1A9F5A4.idx new file mode 100755 index 0000000000000000000000000000000000000000..7d77b5be82dd761f924a8cfd59ffb9a0729ee050 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp.74FE791FA1A9F5A4.idx differ diff --git a/.cache/clangd/index/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp.B77888D4EF3A20F0.idx b/.cache/clangd/index/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp.B77888D4EF3A20F0.idx new file mode 100755 index 0000000000000000000000000000000000000000..640850ea63abf72b111ae8baf0d5be75a715bc5c Binary files /dev/null and b/.cache/clangd/index/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp.B77888D4EF3A20F0.idx differ diff --git a/.cache/clangd/index/batched_gemm_softmax_gemm.hpp.2D7D43E1651E0616.idx b/.cache/clangd/index/batched_gemm_softmax_gemm.hpp.2D7D43E1651E0616.idx new file mode 100755 index 0000000000000000000000000000000000000000..4e073eccda14be895be2c278436e00dfe5387e3e Binary files /dev/null and b/.cache/clangd/index/batched_gemm_softmax_gemm.hpp.2D7D43E1651E0616.idx differ diff --git a/.cache/clangd/index/batched_gemm_softmax_gemm_permute.hpp.A9BA319B35F534EC.idx b/.cache/clangd/index/batched_gemm_softmax_gemm_permute.hpp.A9BA319B35F534EC.idx new file mode 100755 index 0000000000000000000000000000000000000000..0374afc0bbbc96e6ef601ab988ae253bf09544d4 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_softmax_gemm_permute.hpp.A9BA319B35F534EC.idx differ diff --git a/.cache/clangd/index/batched_gemm_xdl_bf16.cpp.ED68A85BB00385BD.idx b/.cache/clangd/index/batched_gemm_xdl_bf16.cpp.ED68A85BB00385BD.idx new file mode 100755 index 0000000000000000000000000000000000000000..9792e0985063adfe8e7feb01826a2a0f78a436ac Binary files /dev/null and b/.cache/clangd/index/batched_gemm_xdl_bf16.cpp.ED68A85BB00385BD.idx differ diff --git a/.cache/clangd/index/batched_gemm_xdl_fp16.cpp.A653F9336CDDD36D.idx b/.cache/clangd/index/batched_gemm_xdl_fp16.cpp.A653F9336CDDD36D.idx new file mode 100755 index 0000000000000000000000000000000000000000..7a7a61505e784cbd681862ba1e9b30523edaee1d Binary files /dev/null and b/.cache/clangd/index/batched_gemm_xdl_fp16.cpp.A653F9336CDDD36D.idx differ diff --git a/.cache/clangd/index/batched_gemm_xdl_fp32.cpp.B7D138F072E276A3.idx b/.cache/clangd/index/batched_gemm_xdl_fp32.cpp.B7D138F072E276A3.idx new file mode 100755 index 0000000000000000000000000000000000000000..85ac05a844349a423034c1d3b4cb3024ecd12d6d Binary files /dev/null and b/.cache/clangd/index/batched_gemm_xdl_fp32.cpp.B7D138F072E276A3.idx differ diff --git a/.cache/clangd/index/batched_gemm_xdl_int8.cpp.AEAA0DF5DDD6BDB1.idx b/.cache/clangd/index/batched_gemm_xdl_int8.cpp.AEAA0DF5DDD6BDB1.idx new file mode 100755 index 0000000000000000000000000000000000000000..c078cfe13a195de6a65ebd795d7698fc5ac0e7c2 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_xdl_int8.cpp.AEAA0DF5DDD6BDB1.idx differ diff --git a/.cache/clangd/index/batchnorm_backward.hpp.7322FEA177E6E308.idx b/.cache/clangd/index/batchnorm_backward.hpp.7322FEA177E6E308.idx new file mode 100755 index 0000000000000000000000000000000000000000..3169f340a348a4d8808214fc743bdf1013d72c1d Binary files /dev/null and b/.cache/clangd/index/batchnorm_backward.hpp.7322FEA177E6E308.idx differ diff --git a/.cache/clangd/index/batchnorm_backward_nhwc.cpp.7637AB6D79637171.idx b/.cache/clangd/index/batchnorm_backward_nhwc.cpp.7637AB6D79637171.idx new file mode 100755 index 0000000000000000000000000000000000000000..7b3e27787241414b2fc01c9d6e9e8ffcfd810c8d Binary files /dev/null and b/.cache/clangd/index/batchnorm_backward_nhwc.cpp.7637AB6D79637171.idx differ diff --git a/.cache/clangd/index/batchnorm_bwd_rank_4.cpp.15A9A6FE401A579D.idx b/.cache/clangd/index/batchnorm_bwd_rank_4.cpp.15A9A6FE401A579D.idx new file mode 100755 index 0000000000000000000000000000000000000000..e95dacdc0dd4574b6b07bb630a2ba6b4bf6b14f5 Binary files /dev/null and b/.cache/clangd/index/batchnorm_bwd_rank_4.cpp.15A9A6FE401A579D.idx differ diff --git a/.cache/clangd/index/batchnorm_common.hpp.7297BD2852C87A9D.idx b/.cache/clangd/index/batchnorm_common.hpp.7297BD2852C87A9D.idx new file mode 100755 index 0000000000000000000000000000000000000000..74aff7b3254da03f5d5a3c41f7b7ad5d3ed191ad Binary files /dev/null and b/.cache/clangd/index/batchnorm_common.hpp.7297BD2852C87A9D.idx differ diff --git a/.cache/clangd/index/batchnorm_forward.hpp.7C4BF3E98C6D2CE7.idx b/.cache/clangd/index/batchnorm_forward.hpp.7C4BF3E98C6D2CE7.idx new file mode 100755 index 0000000000000000000000000000000000000000..4ec87b29dc8ebe816b573c32e812eb6dcb73b75a Binary files /dev/null and b/.cache/clangd/index/batchnorm_forward.hpp.7C4BF3E98C6D2CE7.idx differ diff --git a/.cache/clangd/index/batchnorm_forward_inferring_nhwc.cpp.5CE047E1603B8333.idx b/.cache/clangd/index/batchnorm_forward_inferring_nhwc.cpp.5CE047E1603B8333.idx new file mode 100755 index 0000000000000000000000000000000000000000..5880cc9acd2cd31118dfba8ebffe1af0363e91f9 Binary files /dev/null and b/.cache/clangd/index/batchnorm_forward_inferring_nhwc.cpp.5CE047E1603B8333.idx differ diff --git a/.cache/clangd/index/batchnorm_forward_training_nhwc.cpp.6918CB89E3F27E24.idx b/.cache/clangd/index/batchnorm_forward_training_nhwc.cpp.6918CB89E3F27E24.idx new file mode 100755 index 0000000000000000000000000000000000000000..d1da7863f309bd0c965e476ae5b670c50b1e6562 Binary files /dev/null and b/.cache/clangd/index/batchnorm_forward_training_nhwc.cpp.6918CB89E3F27E24.idx differ diff --git a/.cache/clangd/index/batchnorm_forward_training_nhwc_obsolete.cpp.B9948F07339573BC.idx b/.cache/clangd/index/batchnorm_forward_training_nhwc_obsolete.cpp.B9948F07339573BC.idx new file mode 100755 index 0000000000000000000000000000000000000000..32a4b286c2ddcba2c441427afdbe0788e885d65f Binary files /dev/null and b/.cache/clangd/index/batchnorm_forward_training_nhwc_obsolete.cpp.B9948F07339573BC.idx differ diff --git a/.cache/clangd/index/batchnorm_fwd_rank_4.cpp.051838AC0BBFCDA1.idx b/.cache/clangd/index/batchnorm_fwd_rank_4.cpp.051838AC0BBFCDA1.idx new file mode 100755 index 0000000000000000000000000000000000000000..373223b49cd01455fe8c55b32c66a3dac4149fb5 Binary files /dev/null and b/.cache/clangd/index/batchnorm_fwd_rank_4.cpp.051838AC0BBFCDA1.idx differ diff --git a/.cache/clangd/index/batchnorm_infer.hpp.F7608824D3FE5589.idx b/.cache/clangd/index/batchnorm_infer.hpp.F7608824D3FE5589.idx new file mode 100755 index 0000000000000000000000000000000000000000..37d2d3492da6dcc11d1e2cb691f817b47bf530fe Binary files /dev/null and b/.cache/clangd/index/batchnorm_infer.hpp.F7608824D3FE5589.idx differ diff --git a/.cache/clangd/index/batchnorm_infer_impl.hpp.0B7228D5ED433292.idx b/.cache/clangd/index/batchnorm_infer_impl.hpp.0B7228D5ED433292.idx new file mode 100755 index 0000000000000000000000000000000000000000..f1e9e4e8bef742be1b767d0ea730a9d580132700 Binary files /dev/null and b/.cache/clangd/index/batchnorm_infer_impl.hpp.0B7228D5ED433292.idx differ diff --git a/.cache/clangd/index/batchnorm_infer_rank_4.cpp.16E993641F247D85.idx b/.cache/clangd/index/batchnorm_infer_rank_4.cpp.16E993641F247D85.idx new file mode 100755 index 0000000000000000000000000000000000000000..981c6c9695a9371f890c554b2b3e64e908bb917f Binary files /dev/null and b/.cache/clangd/index/batchnorm_infer_rank_4.cpp.16E993641F247D85.idx differ diff --git a/.cache/clangd/index/binary_element_wise_operation.hpp.D7064E30A6CF6778.idx b/.cache/clangd/index/binary_element_wise_operation.hpp.D7064E30A6CF6778.idx new file mode 100755 index 0000000000000000000000000000000000000000..ffabcaf6f933f2b12b750f4ed7a08268cd53635f Binary files /dev/null and b/.cache/clangd/index/binary_element_wise_operation.hpp.D7064E30A6CF6778.idx differ diff --git a/.cache/clangd/index/block_to_ctile_map.hpp.F32EBB9EC094E255.idx b/.cache/clangd/index/block_to_ctile_map.hpp.F32EBB9EC094E255.idx new file mode 100755 index 0000000000000000000000000000000000000000..cfee4b6bad95913549e9ce27bcb1e6be5fb43b77 Binary files /dev/null and b/.cache/clangd/index/block_to_ctile_map.hpp.F32EBB9EC094E255.idx differ diff --git a/.cache/clangd/index/blockwise_gemm_dpp.hpp.5CB9342685AA4AD4.idx b/.cache/clangd/index/blockwise_gemm_dpp.hpp.5CB9342685AA4AD4.idx new file mode 100755 index 0000000000000000000000000000000000000000..9c1bc1a5d6e95f18003edb6520dc6ba73b5c77f7 Binary files /dev/null and b/.cache/clangd/index/blockwise_gemm_dpp.hpp.5CB9342685AA4AD4.idx differ diff --git a/.cache/clangd/index/blockwise_gemm_pipeline_xdlops.hpp.C000528561553163.idx b/.cache/clangd/index/blockwise_gemm_pipeline_xdlops.hpp.C000528561553163.idx new file mode 100755 index 0000000000000000000000000000000000000000..10b29ea0ed436ae2e6fb3d266d37e569c6993da2 Binary files /dev/null and b/.cache/clangd/index/blockwise_gemm_pipeline_xdlops.hpp.C000528561553163.idx differ diff --git a/.cache/clangd/index/blockwise_gemm_wmma.hpp.33F41B2FF8044BC6.idx b/.cache/clangd/index/blockwise_gemm_wmma.hpp.33F41B2FF8044BC6.idx new file mode 100755 index 0000000000000000000000000000000000000000..a980c11856836e9014b0828eeee43b3fe6a15713 Binary files /dev/null and b/.cache/clangd/index/blockwise_gemm_wmma.hpp.33F41B2FF8044BC6.idx differ diff --git a/.cache/clangd/index/blockwise_gemm_xdl_traits.hpp.9E775B78211AACFC.idx b/.cache/clangd/index/blockwise_gemm_xdl_traits.hpp.9E775B78211AACFC.idx new file mode 100755 index 0000000000000000000000000000000000000000..eab3d4715848a236cdcf81bfe9c9d37e16f737f2 Binary files /dev/null and b/.cache/clangd/index/blockwise_gemm_xdl_traits.hpp.9E775B78211AACFC.idx differ diff --git a/.cache/clangd/index/blockwise_gemm_xdlops.hpp.E5193AF5C1961F73.idx b/.cache/clangd/index/blockwise_gemm_xdlops.hpp.E5193AF5C1961F73.idx new file mode 100755 index 0000000000000000000000000000000000000000..ad152908d55b66aa5cf5550604844032d060ce3b Binary files /dev/null and b/.cache/clangd/index/blockwise_gemm_xdlops.hpp.E5193AF5C1961F73.idx differ diff --git a/.cache/clangd/index/blockwise_gemm_xdlops_skip_b_lds.hpp.BC4BEC2371CE0CF7.idx b/.cache/clangd/index/blockwise_gemm_xdlops_skip_b_lds.hpp.BC4BEC2371CE0CF7.idx new file mode 100755 index 0000000000000000000000000000000000000000..7e053e3f298d8be4a892c11d7e3023ca206335e4 Binary files /dev/null and b/.cache/clangd/index/blockwise_gemm_xdlops_skip_b_lds.hpp.BC4BEC2371CE0CF7.idx differ diff --git a/.cache/clangd/index/blockwise_softmax.hpp.D0089700D1C9998B.idx b/.cache/clangd/index/blockwise_softmax.hpp.D0089700D1C9998B.idx new file mode 100755 index 0000000000000000000000000000000000000000..f4dfa8928920852b8741a6566836e70939f87fdd Binary files /dev/null and b/.cache/clangd/index/blockwise_softmax.hpp.D0089700D1C9998B.idx differ diff --git a/.cache/clangd/index/blockwise_welford.hpp.93790E23276AA4A9.idx b/.cache/clangd/index/blockwise_welford.hpp.93790E23276AA4A9.idx new file mode 100755 index 0000000000000000000000000000000000000000..f7a53b2246938b6167305bef4319b4f1986ba58b Binary files /dev/null and b/.cache/clangd/index/blockwise_welford.hpp.93790E23276AA4A9.idx differ diff --git a/.cache/clangd/index/broadcast_add_2d_amn_bn.cpp.7B44FEB5DE986168.idx b/.cache/clangd/index/broadcast_add_2d_amn_bn.cpp.7B44FEB5DE986168.idx new file mode 100755 index 0000000000000000000000000000000000000000..c07668af67e09d7c8090d681f503b763f176add4 Binary files /dev/null and b/.cache/clangd/index/broadcast_add_2d_amn_bn.cpp.7B44FEB5DE986168.idx differ diff --git a/.cache/clangd/index/broadcast_add_3d_am_bmnk.cpp.D83172D8CBF829F1.idx b/.cache/clangd/index/broadcast_add_3d_am_bmnk.cpp.D83172D8CBF829F1.idx new file mode 100755 index 0000000000000000000000000000000000000000..eb1b2b8fe8e8c1577adcff6f019e66ea2a160db9 Binary files /dev/null and b/.cache/clangd/index/broadcast_add_3d_am_bmnk.cpp.D83172D8CBF829F1.idx differ diff --git a/.cache/clangd/index/c_style_pointer_cast.hpp.6866D538A80ECC76.idx b/.cache/clangd/index/c_style_pointer_cast.hpp.6866D538A80ECC76.idx new file mode 100755 index 0000000000000000000000000000000000000000..cec437ee3ecf5ce1a18e0f869f82b50386273ca3 Binary files /dev/null and b/.cache/clangd/index/c_style_pointer_cast.hpp.6866D538A80ECC76.idx differ diff --git a/.cache/clangd/index/cgemm_xdl_bf16.cpp.D34D118D7BC516E4.idx b/.cache/clangd/index/cgemm_xdl_bf16.cpp.D34D118D7BC516E4.idx new file mode 100755 index 0000000000000000000000000000000000000000..aa65d663ae6f3a44a5e1507da1805bf4062c95ed Binary files /dev/null and b/.cache/clangd/index/cgemm_xdl_bf16.cpp.D34D118D7BC516E4.idx differ diff --git a/.cache/clangd/index/cgemm_xdl_common.hpp.FBF552CB8A2753F4.idx b/.cache/clangd/index/cgemm_xdl_common.hpp.FBF552CB8A2753F4.idx new file mode 100755 index 0000000000000000000000000000000000000000..7a1eaa350aa4714de7b30be95c3a1294b0d8a608 Binary files /dev/null and b/.cache/clangd/index/cgemm_xdl_common.hpp.FBF552CB8A2753F4.idx differ diff --git a/.cache/clangd/index/cgemm_xdl_fp16.cpp.93DC5CFE4CF25E39.idx b/.cache/clangd/index/cgemm_xdl_fp16.cpp.93DC5CFE4CF25E39.idx new file mode 100755 index 0000000000000000000000000000000000000000..8f14e6179f8a7122c479978f7fe32a75eda878a4 Binary files /dev/null and b/.cache/clangd/index/cgemm_xdl_fp16.cpp.93DC5CFE4CF25E39.idx differ diff --git a/.cache/clangd/index/cgemm_xdl_fp32.cpp.D4109241584CC5F6.idx b/.cache/clangd/index/cgemm_xdl_fp32.cpp.D4109241584CC5F6.idx new file mode 100755 index 0000000000000000000000000000000000000000..2f716bd3dbee1370d92231ddaa647a3af58776ba Binary files /dev/null and b/.cache/clangd/index/cgemm_xdl_fp32.cpp.D4109241584CC5F6.idx differ diff --git a/.cache/clangd/index/cgemm_xdl_int8.cpp.7D0F341E5936B471.idx b/.cache/clangd/index/cgemm_xdl_int8.cpp.7D0F341E5936B471.idx new file mode 100755 index 0000000000000000000000000000000000000000..87eb49378d1c077505be5ddea65494d16cac883f Binary files /dev/null and b/.cache/clangd/index/cgemm_xdl_int8.cpp.7D0F341E5936B471.idx differ diff --git a/.cache/clangd/index/check_err.hpp.8BCADBDE7B4A301B.idx b/.cache/clangd/index/check_err.hpp.8BCADBDE7B4A301B.idx new file mode 100755 index 0000000000000000000000000000000000000000..8eba5263d1990f206a69ee271daa4ca211de9e68 Binary files /dev/null and b/.cache/clangd/index/check_err.hpp.8BCADBDE7B4A301B.idx differ diff --git a/.cache/clangd/index/ck.hpp.5BA5ECB9E73F13E2.idx b/.cache/clangd/index/ck.hpp.5BA5ECB9E73F13E2.idx new file mode 100755 index 0000000000000000000000000000000000000000..2fa7b44ac04ab1410f344f5b3f71575442143e1d Binary files /dev/null and b/.cache/clangd/index/ck.hpp.5BA5ECB9E73F13E2.idx differ diff --git a/.cache/clangd/index/cluster_descriptor.hpp.B962ADBD27415A8A.idx b/.cache/clangd/index/cluster_descriptor.hpp.B962ADBD27415A8A.idx new file mode 100755 index 0000000000000000000000000000000000000000..e78adc52af9d1dac6c45faedb6cef36e108f7c64 Binary files /dev/null and b/.cache/clangd/index/cluster_descriptor.hpp.B962ADBD27415A8A.idx differ diff --git a/.cache/clangd/index/column_to_image_f32.cpp.B78257C7C44FE524.idx b/.cache/clangd/index/column_to_image_f32.cpp.B78257C7C44FE524.idx new file mode 100755 index 0000000000000000000000000000000000000000..aa0f8ec93224f22f4cafbe04c4cee6c8434dabf4 Binary files /dev/null and b/.cache/clangd/index/column_to_image_f32.cpp.B78257C7C44FE524.idx differ diff --git a/.cache/clangd/index/common.hpp.030C990214BE859F.idx b/.cache/clangd/index/common.hpp.030C990214BE859F.idx new file mode 100755 index 0000000000000000000000000000000000000000..d6e43323fee7cd70d84e74c82306d1c4f74a2ab6 Binary files /dev/null and b/.cache/clangd/index/common.hpp.030C990214BE859F.idx differ diff --git a/.cache/clangd/index/common.hpp.0CF2F8C8A70630AF.idx b/.cache/clangd/index/common.hpp.0CF2F8C8A70630AF.idx new file mode 100755 index 0000000000000000000000000000000000000000..3125fa18e50ad19e7480ba82b54c3a150d5073a3 Binary files /dev/null and b/.cache/clangd/index/common.hpp.0CF2F8C8A70630AF.idx differ diff --git a/.cache/clangd/index/common.hpp.116380F0FD4C88BB.idx b/.cache/clangd/index/common.hpp.116380F0FD4C88BB.idx new file mode 100755 index 0000000000000000000000000000000000000000..ccefb22335e3d55c02b46b748c11bb6c781f9bcd Binary files /dev/null and b/.cache/clangd/index/common.hpp.116380F0FD4C88BB.idx differ diff --git a/.cache/clangd/index/common.hpp.30E7C524D1E7DCC8.idx b/.cache/clangd/index/common.hpp.30E7C524D1E7DCC8.idx new file mode 100755 index 0000000000000000000000000000000000000000..7b586e6cdbff4463124a4c74ea37afd065f383c3 Binary files /dev/null and b/.cache/clangd/index/common.hpp.30E7C524D1E7DCC8.idx differ diff --git a/.cache/clangd/index/common.hpp.3324DE766DB5A26E.idx b/.cache/clangd/index/common.hpp.3324DE766DB5A26E.idx new file mode 100755 index 0000000000000000000000000000000000000000..b10444bfb8d103dbc3bb728f2e8c2efc34e34584 Binary files /dev/null and b/.cache/clangd/index/common.hpp.3324DE766DB5A26E.idx differ diff --git a/.cache/clangd/index/common.hpp.725393A22E55A928.idx b/.cache/clangd/index/common.hpp.725393A22E55A928.idx new file mode 100755 index 0000000000000000000000000000000000000000..de3a34a8e9bfe381c3279530ba33253e2274d56f Binary files /dev/null and b/.cache/clangd/index/common.hpp.725393A22E55A928.idx differ diff --git a/.cache/clangd/index/common.hpp.7922E75661986F8F.idx b/.cache/clangd/index/common.hpp.7922E75661986F8F.idx new file mode 100755 index 0000000000000000000000000000000000000000..6f9762ba2354e7418aba806494cd9193af57a24e Binary files /dev/null and b/.cache/clangd/index/common.hpp.7922E75661986F8F.idx differ diff --git a/.cache/clangd/index/common.hpp.98AE17ADFF3CA289.idx b/.cache/clangd/index/common.hpp.98AE17ADFF3CA289.idx new file mode 100755 index 0000000000000000000000000000000000000000..9d274b8774d3d01495c14e57a951eced240434e9 Binary files /dev/null and b/.cache/clangd/index/common.hpp.98AE17ADFF3CA289.idx differ diff --git a/.cache/clangd/index/common.hpp.A2FB78C7F0E24A0E.idx b/.cache/clangd/index/common.hpp.A2FB78C7F0E24A0E.idx new file mode 100755 index 0000000000000000000000000000000000000000..99c952d679ad1acf08affec1b0f3d2f0769891dc Binary files /dev/null and b/.cache/clangd/index/common.hpp.A2FB78C7F0E24A0E.idx differ diff --git a/.cache/clangd/index/common.hpp.AA5CB32F503E8500.idx b/.cache/clangd/index/common.hpp.AA5CB32F503E8500.idx new file mode 100755 index 0000000000000000000000000000000000000000..42c7798c5b052c63c99329d7e6982d16fe0870a3 Binary files /dev/null and b/.cache/clangd/index/common.hpp.AA5CB32F503E8500.idx differ diff --git a/.cache/clangd/index/common.hpp.D8FBDC6F47AA6D80.idx b/.cache/clangd/index/common.hpp.D8FBDC6F47AA6D80.idx new file mode 100755 index 0000000000000000000000000000000000000000..ace4d33f1125488c55ec161e80d23e57eb2f9133 Binary files /dev/null and b/.cache/clangd/index/common.hpp.D8FBDC6F47AA6D80.idx differ diff --git a/.cache/clangd/index/common.hpp.E36EEFF638BE348F.idx b/.cache/clangd/index/common.hpp.E36EEFF638BE348F.idx new file mode 100755 index 0000000000000000000000000000000000000000..30311b4d79f8bb7dff5e0aca9e82cb388b8da589 Binary files /dev/null and b/.cache/clangd/index/common.hpp.E36EEFF638BE348F.idx differ diff --git a/.cache/clangd/index/common.hpp.F52D770AC14BC38D.idx b/.cache/clangd/index/common.hpp.F52D770AC14BC38D.idx new file mode 100755 index 0000000000000000000000000000000000000000..2df7dfc43c5703611a3f93af80cfa62319a7682d Binary files /dev/null and b/.cache/clangd/index/common.hpp.F52D770AC14BC38D.idx differ diff --git a/.cache/clangd/index/common.hpp.FF4590F87A18C9AE.idx b/.cache/clangd/index/common.hpp.FF4590F87A18C9AE.idx new file mode 100755 index 0000000000000000000000000000000000000000..60dfc17692991ba702f109434de26b4ec008c442 Binary files /dev/null and b/.cache/clangd/index/common.hpp.FF4590F87A18C9AE.idx differ diff --git a/.cache/clangd/index/common_header.hpp.BCB8C81E7AAAD43F.idx b/.cache/clangd/index/common_header.hpp.BCB8C81E7AAAD43F.idx new file mode 100755 index 0000000000000000000000000000000000000000..08aa5b65fc57e3fed4370c7d62298bb9e5778631 Binary files /dev/null and b/.cache/clangd/index/common_header.hpp.BCB8C81E7AAAD43F.idx differ diff --git a/.cache/clangd/index/common_instances.hpp.C20932BA881E0448.idx b/.cache/clangd/index/common_instances.hpp.C20932BA881E0448.idx new file mode 100755 index 0000000000000000000000000000000000000000..90c5ba47e8b95ce14a3938495846428f1a1cb3eb Binary files /dev/null and b/.cache/clangd/index/common_instances.hpp.C20932BA881E0448.idx differ diff --git a/.cache/clangd/index/container_element_picker.hpp.D0295372D9F2DFD0.idx b/.cache/clangd/index/container_element_picker.hpp.D0295372D9F2DFD0.idx new file mode 100755 index 0000000000000000000000000000000000000000..26f22696207015f7f54b4e10e5a11ed53d17cf2f Binary files /dev/null and b/.cache/clangd/index/container_element_picker.hpp.D0295372D9F2DFD0.idx differ diff --git a/.cache/clangd/index/container_helper.hpp.57BCB53456250D8A.idx b/.cache/clangd/index/container_helper.hpp.57BCB53456250D8A.idx new file mode 100755 index 0000000000000000000000000000000000000000..3b36d59535731580858f001a6be62269b81ddcb3 Binary files /dev/null and b/.cache/clangd/index/container_helper.hpp.57BCB53456250D8A.idx differ diff --git a/.cache/clangd/index/contraction_bilinear.hpp.3FE8920B21D2C083.idx b/.cache/clangd/index/contraction_bilinear.hpp.3FE8920B21D2C083.idx new file mode 100755 index 0000000000000000000000000000000000000000..8ad808c80040e804fc5fbfdeb4b14a29e17ef2d7 Binary files /dev/null and b/.cache/clangd/index/contraction_bilinear.hpp.3FE8920B21D2C083.idx differ diff --git a/.cache/clangd/index/contraction_bilinear_xdl_bf16_compute_fp32.cpp.34FCAF706B5AEB99.idx b/.cache/clangd/index/contraction_bilinear_xdl_bf16_compute_fp32.cpp.34FCAF706B5AEB99.idx new file mode 100755 index 0000000000000000000000000000000000000000..9b0d0e5f371266fbdb5da77b0b49cd4b16521c1c Binary files /dev/null and b/.cache/clangd/index/contraction_bilinear_xdl_bf16_compute_fp32.cpp.34FCAF706B5AEB99.idx differ diff --git a/.cache/clangd/index/contraction_bilinear_xdl_fp16_compute_fp32.cpp.038CE2A80837D541.idx b/.cache/clangd/index/contraction_bilinear_xdl_fp16_compute_fp32.cpp.038CE2A80837D541.idx new file mode 100755 index 0000000000000000000000000000000000000000..e5dc6c876f8f7948904cf422dbbddcd57895a4c5 Binary files /dev/null and b/.cache/clangd/index/contraction_bilinear_xdl_fp16_compute_fp32.cpp.038CE2A80837D541.idx differ diff --git a/.cache/clangd/index/contraction_bilinear_xdl_fp32.cpp.37E0EC20BE4060B3.idx b/.cache/clangd/index/contraction_bilinear_xdl_fp32.cpp.37E0EC20BE4060B3.idx new file mode 100755 index 0000000000000000000000000000000000000000..e7d422e801b86f94a40f1f12d9f3e50ef43b32e7 Binary files /dev/null and b/.cache/clangd/index/contraction_bilinear_xdl_fp32.cpp.37E0EC20BE4060B3.idx differ diff --git a/.cache/clangd/index/contraction_bilinear_xdl_fp32_compute_bf16.cpp.4B6BEF9EB4DD9C01.idx b/.cache/clangd/index/contraction_bilinear_xdl_fp32_compute_bf16.cpp.4B6BEF9EB4DD9C01.idx new file mode 100755 index 0000000000000000000000000000000000000000..cc3d4dd80d4a9c3f4e093c50644aed961100d888 Binary files /dev/null and b/.cache/clangd/index/contraction_bilinear_xdl_fp32_compute_bf16.cpp.4B6BEF9EB4DD9C01.idx differ diff --git a/.cache/clangd/index/contraction_bilinear_xdl_fp32_compute_fp16.cpp.F92204694821E2EC.idx b/.cache/clangd/index/contraction_bilinear_xdl_fp32_compute_fp16.cpp.F92204694821E2EC.idx new file mode 100755 index 0000000000000000000000000000000000000000..9f074231c657a90fffc04ffaf639848291fbce3e Binary files /dev/null and b/.cache/clangd/index/contraction_bilinear_xdl_fp32_compute_fp16.cpp.F92204694821E2EC.idx differ diff --git a/.cache/clangd/index/contraction_bilinear_xdl_fp64.cpp.D740E9866ECDD95C.idx b/.cache/clangd/index/contraction_bilinear_xdl_fp64.cpp.D740E9866ECDD95C.idx new file mode 100755 index 0000000000000000000000000000000000000000..1d5b75057c7b30e4f14a46a502a1ef18172b339d Binary files /dev/null and b/.cache/clangd/index/contraction_bilinear_xdl_fp64.cpp.D740E9866ECDD95C.idx differ diff --git a/.cache/clangd/index/contraction_bilinear_xdl_fp64_compute_fp32.cpp.29832DEA4C5E2D0A.idx b/.cache/clangd/index/contraction_bilinear_xdl_fp64_compute_fp32.cpp.29832DEA4C5E2D0A.idx new file mode 100755 index 0000000000000000000000000000000000000000..251b6218bed576b74207e66458dab5ed4a57d3b7 Binary files /dev/null and b/.cache/clangd/index/contraction_bilinear_xdl_fp64_compute_fp32.cpp.29832DEA4C5E2D0A.idx differ diff --git a/.cache/clangd/index/contraction_multi_ABD_xdl_fp16.cpp.95E96326F13F98F9.idx b/.cache/clangd/index/contraction_multi_ABD_xdl_fp16.cpp.95E96326F13F98F9.idx new file mode 100755 index 0000000000000000000000000000000000000000..f3d542513cada78f7c26179cd00e24ba7f4cf784 Binary files /dev/null and b/.cache/clangd/index/contraction_multi_ABD_xdl_fp16.cpp.95E96326F13F98F9.idx differ diff --git a/.cache/clangd/index/contraction_scale.hpp.9A93BE54CAD13142.idx b/.cache/clangd/index/contraction_scale.hpp.9A93BE54CAD13142.idx new file mode 100755 index 0000000000000000000000000000000000000000..261b1f924460c135aece7b8736b44d3ab653df02 Binary files /dev/null and b/.cache/clangd/index/contraction_scale.hpp.9A93BE54CAD13142.idx differ diff --git a/.cache/clangd/index/contraction_scale_xdl_bf16_compute_fp32.cpp.2E396E91009DF0FA.idx b/.cache/clangd/index/contraction_scale_xdl_bf16_compute_fp32.cpp.2E396E91009DF0FA.idx new file mode 100755 index 0000000000000000000000000000000000000000..316fef4806799cfefdc62663b93d1ae3aee8e811 Binary files /dev/null and b/.cache/clangd/index/contraction_scale_xdl_bf16_compute_fp32.cpp.2E396E91009DF0FA.idx differ diff --git a/.cache/clangd/index/contraction_scale_xdl_fp16_compute_fp32.cpp.A7A07273739AC223.idx b/.cache/clangd/index/contraction_scale_xdl_fp16_compute_fp32.cpp.A7A07273739AC223.idx new file mode 100755 index 0000000000000000000000000000000000000000..8de9268ae7d65b7bcf896919b590c6333d97ccba Binary files /dev/null and b/.cache/clangd/index/contraction_scale_xdl_fp16_compute_fp32.cpp.A7A07273739AC223.idx differ diff --git a/.cache/clangd/index/contraction_scale_xdl_fp32.cpp.1A2C9B556CFF860B.idx b/.cache/clangd/index/contraction_scale_xdl_fp32.cpp.1A2C9B556CFF860B.idx new file mode 100755 index 0000000000000000000000000000000000000000..694b3a5cf3c8f26ae5d265ceb19e3fbc25a4f057 Binary files /dev/null and b/.cache/clangd/index/contraction_scale_xdl_fp32.cpp.1A2C9B556CFF860B.idx differ diff --git a/.cache/clangd/index/contraction_scale_xdl_fp32_compute_bf16.cpp.301F203197216C44.idx b/.cache/clangd/index/contraction_scale_xdl_fp32_compute_bf16.cpp.301F203197216C44.idx new file mode 100755 index 0000000000000000000000000000000000000000..29c90e89e36cf388827f22d5177b9ff0f36125ae Binary files /dev/null and b/.cache/clangd/index/contraction_scale_xdl_fp32_compute_bf16.cpp.301F203197216C44.idx differ diff --git a/.cache/clangd/index/contraction_scale_xdl_fp32_compute_fp16.cpp.F7341E88A32312DC.idx b/.cache/clangd/index/contraction_scale_xdl_fp32_compute_fp16.cpp.F7341E88A32312DC.idx new file mode 100755 index 0000000000000000000000000000000000000000..80d5bb01172df881771dcd77bc8a3d802724d6a0 Binary files /dev/null and b/.cache/clangd/index/contraction_scale_xdl_fp32_compute_fp16.cpp.F7341E88A32312DC.idx differ diff --git a/.cache/clangd/index/contraction_scale_xdl_fp64.cpp.7417F385FB954651.idx b/.cache/clangd/index/contraction_scale_xdl_fp64.cpp.7417F385FB954651.idx new file mode 100755 index 0000000000000000000000000000000000000000..0201b38d42d9b4fb10ada3866cf0a2ee3f3752ef Binary files /dev/null and b/.cache/clangd/index/contraction_scale_xdl_fp64.cpp.7417F385FB954651.idx differ diff --git a/.cache/clangd/index/contraction_scale_xdl_fp64_compute_fp32.cpp.02A2D58B76014A4B.idx b/.cache/clangd/index/contraction_scale_xdl_fp64_compute_fp32.cpp.02A2D58B76014A4B.idx new file mode 100755 index 0000000000000000000000000000000000000000..5542395726607e7b1e986b33614cfc65f99403d8 Binary files /dev/null and b/.cache/clangd/index/contraction_scale_xdl_fp64_compute_fp32.cpp.02A2D58B76014A4B.idx differ diff --git a/.cache/clangd/index/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp.D342A33945D6732C.idx b/.cache/clangd/index/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp.D342A33945D6732C.idx new file mode 100755 index 0000000000000000000000000000000000000000..3ef24c5eb874664d3a80bbc64239ed98b85af519 Binary files /dev/null and b/.cache/clangd/index/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp.D342A33945D6732C.idx differ diff --git a/.cache/clangd/index/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp.E5EA635D0BE03F7D.idx b/.cache/clangd/index/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp.E5EA635D0BE03F7D.idx new file mode 100755 index 0000000000000000000000000000000000000000..d3bbf74155243e18db0965768f4dba63d537fdc7 Binary files /dev/null and b/.cache/clangd/index/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp.E5EA635D0BE03F7D.idx differ diff --git a/.cache/clangd/index/conv2d_fwd_xdl_perchannel_quantization_int8.cpp.DA805D9EE5DF962F.idx b/.cache/clangd/index/conv2d_fwd_xdl_perchannel_quantization_int8.cpp.DA805D9EE5DF962F.idx new file mode 100755 index 0000000000000000000000000000000000000000..d9cfe305dd51ff28302055445d0eb853f57aff51 Binary files /dev/null and b/.cache/clangd/index/conv2d_fwd_xdl_perchannel_quantization_int8.cpp.DA805D9EE5DF962F.idx differ diff --git a/.cache/clangd/index/conv2d_fwd_xdl_perlayer_quantization_int8.cpp.7D138D1D290C5C53.idx b/.cache/clangd/index/conv2d_fwd_xdl_perlayer_quantization_int8.cpp.7D138D1D290C5C53.idx new file mode 100755 index 0000000000000000000000000000000000000000..002f9434925a98b9900fdd4beb6b901ac771a468 Binary files /dev/null and b/.cache/clangd/index/conv2d_fwd_xdl_perlayer_quantization_int8.cpp.7D138D1D290C5C53.idx differ diff --git a/.cache/clangd/index/conv2d_quantization_common.hpp.2C2B760A7CE8525E.idx b/.cache/clangd/index/conv2d_quantization_common.hpp.2C2B760A7CE8525E.idx new file mode 100755 index 0000000000000000000000000000000000000000..611e56f23c185cf5b804578cbc210ee144a87fd4 Binary files /dev/null and b/.cache/clangd/index/conv2d_quantization_common.hpp.2C2B760A7CE8525E.idx differ diff --git a/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_bf16.cpp.8FB5CAA0FDE2EFE8.idx b/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_bf16.cpp.8FB5CAA0FDE2EFE8.idx new file mode 100755 index 0000000000000000000000000000000000000000..cea088cc6dcecc1ff8228f94a1b33fc5d39c9d1a Binary files /dev/null and b/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_bf16.cpp.8FB5CAA0FDE2EFE8.idx differ diff --git a/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_fp16.cpp.A862E22D755445CE.idx b/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_fp16.cpp.A862E22D755445CE.idx new file mode 100755 index 0000000000000000000000000000000000000000..234c2aa9309c32376462e4c05c113e9f79aed6c0 Binary files /dev/null and b/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_fp16.cpp.A862E22D755445CE.idx differ diff --git a/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_fp32.cpp.E8494EFCFB4F3B30.idx b/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_fp32.cpp.E8494EFCFB4F3B30.idx new file mode 100755 index 0000000000000000000000000000000000000000..b8061eca8d81e2b274eadfd0280729be046256f7 Binary files /dev/null and b/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_fp32.cpp.E8494EFCFB4F3B30.idx differ diff --git a/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_int8.cpp.97C188D55BA3C9C1.idx b/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_int8.cpp.97C188D55BA3C9C1.idx new file mode 100755 index 0000000000000000000000000000000000000000..5fe42018566b8b8e939061a2b39421b22b7168f8 Binary files /dev/null and b/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_int8.cpp.97C188D55BA3C9C1.idx differ diff --git a/.cache/clangd/index/conv_tensor_rearrange.hpp.1BD4F049133037BC.idx b/.cache/clangd/index/conv_tensor_rearrange.hpp.1BD4F049133037BC.idx new file mode 100755 index 0000000000000000000000000000000000000000..21c4fd727b825a84dce57232129632b5121d56f8 Binary files /dev/null and b/.cache/clangd/index/conv_tensor_rearrange.hpp.1BD4F049133037BC.idx differ diff --git a/.cache/clangd/index/conv_tensor_rearrange_op.hpp.1DE16BC6AE42EE5B.idx b/.cache/clangd/index/conv_tensor_rearrange_op.hpp.1DE16BC6AE42EE5B.idx new file mode 100755 index 0000000000000000000000000000000000000000..769092f8be4a961b52cb19a091baee4a4872dd7a Binary files /dev/null and b/.cache/clangd/index/conv_tensor_rearrange_op.hpp.1DE16BC6AE42EE5B.idx differ diff --git a/.cache/clangd/index/conv_util.cpp.2DB2D9F346007F63.idx b/.cache/clangd/index/conv_util.cpp.2DB2D9F346007F63.idx new file mode 100755 index 0000000000000000000000000000000000000000..aac25c6623f36ecbc3adcc848ca592022ac0f4cd Binary files /dev/null and b/.cache/clangd/index/conv_util.cpp.2DB2D9F346007F63.idx differ diff --git a/.cache/clangd/index/convnd_bwd_data.cpp.7C4E178AEE50DCF2.idx b/.cache/clangd/index/convnd_bwd_data.cpp.7C4E178AEE50DCF2.idx new file mode 100755 index 0000000000000000000000000000000000000000..f8664073ed0b589c7effba1fe7e7f3aaae9ca6aa Binary files /dev/null and b/.cache/clangd/index/convnd_bwd_data.cpp.7C4E178AEE50DCF2.idx differ diff --git a/.cache/clangd/index/convnd_bwd_data_common.hpp.FAAF76603FF7D70B.idx b/.cache/clangd/index/convnd_bwd_data_common.hpp.FAAF76603FF7D70B.idx new file mode 100755 index 0000000000000000000000000000000000000000..d712d61aa89e99b5dc070e81da832d0cfafe77df Binary files /dev/null and b/.cache/clangd/index/convnd_bwd_data_common.hpp.FAAF76603FF7D70B.idx differ diff --git a/.cache/clangd/index/convnd_bwd_data_xdl_bilinear_residual_fp16.cpp.17D6FDBA1D601B9A.idx b/.cache/clangd/index/convnd_bwd_data_xdl_bilinear_residual_fp16.cpp.17D6FDBA1D601B9A.idx new file mode 100755 index 0000000000000000000000000000000000000000..362e15c797c29c6b1ce8b495364ff82aca874297 Binary files /dev/null and b/.cache/clangd/index/convnd_bwd_data_xdl_bilinear_residual_fp16.cpp.17D6FDBA1D601B9A.idx differ diff --git a/.cache/clangd/index/convnd_bwd_data_xdl_fp16.cpp.32F247EF8BEF9A27.idx b/.cache/clangd/index/convnd_bwd_data_xdl_fp16.cpp.32F247EF8BEF9A27.idx new file mode 100755 index 0000000000000000000000000000000000000000..1dcb1319b1d62ac03144fa0acdd7a57f078b6124 Binary files /dev/null and b/.cache/clangd/index/convnd_bwd_data_xdl_fp16.cpp.32F247EF8BEF9A27.idx differ diff --git a/.cache/clangd/index/convnd_fwd.cpp.63437EF3B0E7154B.idx b/.cache/clangd/index/convnd_fwd.cpp.63437EF3B0E7154B.idx new file mode 100755 index 0000000000000000000000000000000000000000..9351ce43d998642cebf4b8463675b5c266f9ce0f Binary files /dev/null and b/.cache/clangd/index/convnd_fwd.cpp.63437EF3B0E7154B.idx differ diff --git a/.cache/clangd/index/convnd_fwd_activ_multi_ab_common.hpp.F16D50D450CBF542.idx b/.cache/clangd/index/convnd_fwd_activ_multi_ab_common.hpp.F16D50D450CBF542.idx new file mode 100755 index 0000000000000000000000000000000000000000..2ad3b26124e25d5d25cddb68438be04f0c45c0dc Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_activ_multi_ab_common.hpp.F16D50D450CBF542.idx differ diff --git a/.cache/clangd/index/convnd_fwd_activ_unary_common.hpp.A3C6C6A8E8602CD2.idx b/.cache/clangd/index/convnd_fwd_activ_unary_common.hpp.A3C6C6A8E8602CD2.idx new file mode 100755 index 0000000000000000000000000000000000000000..3274c2601cd68fd56671cca3550c8606a606b1e6 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_activ_unary_common.hpp.A3C6C6A8E8602CD2.idx differ diff --git a/.cache/clangd/index/convnd_fwd_common.hpp.1735C0647D85C158.idx b/.cache/clangd/index/convnd_fwd_common.hpp.1735C0647D85C158.idx new file mode 100755 index 0000000000000000000000000000000000000000..9dd712f57a7f1df28311dbffc9fff44dc9a6a6ac Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_common.hpp.1735C0647D85C158.idx differ diff --git a/.cache/clangd/index/convnd_fwd_max_xdl_bf16.cpp.32DD457450CC173D.idx b/.cache/clangd/index/convnd_fwd_max_xdl_bf16.cpp.32DD457450CC173D.idx new file mode 100755 index 0000000000000000000000000000000000000000..9f7e7ec21062f420950c299a6de77d190c816bc5 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_max_xdl_bf16.cpp.32DD457450CC173D.idx differ diff --git a/.cache/clangd/index/convnd_fwd_max_xdl_fp16.cpp.57DCB3A8914F4BCB.idx b/.cache/clangd/index/convnd_fwd_max_xdl_fp16.cpp.57DCB3A8914F4BCB.idx new file mode 100755 index 0000000000000000000000000000000000000000..983ca1342792db4c637acefb777b2f8d25e98ea5 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_max_xdl_fp16.cpp.57DCB3A8914F4BCB.idx differ diff --git a/.cache/clangd/index/convnd_fwd_max_xdl_fp32.cpp.7C22839582612580.idx b/.cache/clangd/index/convnd_fwd_max_xdl_fp32.cpp.7C22839582612580.idx new file mode 100755 index 0000000000000000000000000000000000000000..be88175ed34b966bc0e34815c8b634981fc8a9cb Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_max_xdl_fp32.cpp.7C22839582612580.idx differ diff --git a/.cache/clangd/index/convnd_fwd_max_xdl_int8.cpp.E84F6E18A0DCB8A3.idx b/.cache/clangd/index/convnd_fwd_max_xdl_int8.cpp.E84F6E18A0DCB8A3.idx new file mode 100755 index 0000000000000000000000000000000000000000..4f6fa22878dbb44c49345c236cabee1b29d14047 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_max_xdl_int8.cpp.E84F6E18A0DCB8A3.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_abs_fp16.cpp.B14E3C722763D4B8.idx b/.cache/clangd/index/convnd_fwd_xdl_abs_fp16.cpp.B14E3C722763D4B8.idx new file mode 100755 index 0000000000000000000000000000000000000000..338128dad4dea23272da777baf09e94cb04ccc25 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_abs_fp16.cpp.B14E3C722763D4B8.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_bf16.cpp.054A24EF5BFF4A1F.idx b/.cache/clangd/index/convnd_fwd_xdl_bf16.cpp.054A24EF5BFF4A1F.idx new file mode 100755 index 0000000000000000000000000000000000000000..26863ef07b85ccac67a6fbd97fce266a2aae2e5f Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_bf16.cpp.054A24EF5BFF4A1F.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_bilinear_residual_fp16.cpp.C4A66C697A8DEBDA.idx b/.cache/clangd/index/convnd_fwd_xdl_bilinear_residual_fp16.cpp.C4A66C697A8DEBDA.idx new file mode 100755 index 0000000000000000000000000000000000000000..f0d49e1faf60f0cd9a8147566c3e528ff6e93dd4 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_bilinear_residual_fp16.cpp.C4A66C697A8DEBDA.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_clippedrelu_fp16.cpp.4759FB8DCFC58E7D.idx b/.cache/clangd/index/convnd_fwd_xdl_clippedrelu_fp16.cpp.4759FB8DCFC58E7D.idx new file mode 100755 index 0000000000000000000000000000000000000000..bbb1713a154f91b4d7591f998a68fdfa3ba112a7 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_clippedrelu_fp16.cpp.4759FB8DCFC58E7D.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_elu_fp16.cpp.922AD9048F6922A8.idx b/.cache/clangd/index/convnd_fwd_xdl_elu_fp16.cpp.922AD9048F6922A8.idx new file mode 100755 index 0000000000000000000000000000000000000000..ec61376da5fb72c1f3d225aedbe9dd8e3114d2b9 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_elu_fp16.cpp.922AD9048F6922A8.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_fp16.cpp.E044D0CD44CCFE7F.idx b/.cache/clangd/index/convnd_fwd_xdl_fp16.cpp.E044D0CD44CCFE7F.idx new file mode 100755 index 0000000000000000000000000000000000000000..4986ae43f4974898209c5bc9843cb262c56594b2 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_fp16.cpp.E044D0CD44CCFE7F.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_fp32.cpp.911AF34195327474.idx b/.cache/clangd/index/convnd_fwd_xdl_fp32.cpp.911AF34195327474.idx new file mode 100755 index 0000000000000000000000000000000000000000..78aa926495a40ef8d84f4d9c64005c65a073925b Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_fp32.cpp.911AF34195327474.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_fp64.cpp.3D0123C922DFAFA2.idx b/.cache/clangd/index/convnd_fwd_xdl_fp64.cpp.3D0123C922DFAFA2.idx new file mode 100755 index 0000000000000000000000000000000000000000..97c2ae2bcbfb6203d1fe200b18c664c98bb44fde Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_fp64.cpp.3D0123C922DFAFA2.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_int8.cpp.1BF1AFB682499568.idx b/.cache/clangd/index/convnd_fwd_xdl_int8.cpp.1BF1AFB682499568.idx new file mode 100755 index 0000000000000000000000000000000000000000..6ca16ad7bbb4033e92b7397c9b3fe02f4847dbda Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_int8.cpp.1BF1AFB682499568.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_leakyrelu_fp16.cpp.BA4E6713D7C74F09.idx b/.cache/clangd/index/convnd_fwd_xdl_leakyrelu_fp16.cpp.BA4E6713D7C74F09.idx new file mode 100755 index 0000000000000000000000000000000000000000..9f7c2c4e4880beb6b958abb44be9b8a044b60eee Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_leakyrelu_fp16.cpp.BA4E6713D7C74F09.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_pow_fp16.cpp.0F662794CF8A0DB2.idx b/.cache/clangd/index/convnd_fwd_xdl_pow_fp16.cpp.0F662794CF8A0DB2.idx new file mode 100755 index 0000000000000000000000000000000000000000..bd633b7c76a7bbcd9e5e93cb6b4b756eca5db3cd Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_pow_fp16.cpp.0F662794CF8A0DB2.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_relu_fp16.cpp.8F7B959CCFC1E66F.idx b/.cache/clangd/index/convnd_fwd_xdl_relu_fp16.cpp.8F7B959CCFC1E66F.idx new file mode 100755 index 0000000000000000000000000000000000000000..5abe29475beebcdb9151af66ffaf3d1b72ff6918 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_relu_fp16.cpp.8F7B959CCFC1E66F.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp.54C5613FFA1E2460.idx b/.cache/clangd/index/convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp.54C5613FFA1E2460.idx new file mode 100755 index 0000000000000000000000000000000000000000..0d5098787953eb9e790ba318c37e89f68d9a6186 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp.54C5613FFA1E2460.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp.303711D8C093AB3A.idx b/.cache/clangd/index/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp.303711D8C093AB3A.idx new file mode 100755 index 0000000000000000000000000000000000000000..c21fab0457bb98c8180dae2580fba37a752acf31 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp.303711D8C093AB3A.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_sigmoid_fp16.cpp.7462E7274E7DC93D.idx b/.cache/clangd/index/convnd_fwd_xdl_sigmoid_fp16.cpp.7462E7274E7DC93D.idx new file mode 100755 index 0000000000000000000000000000000000000000..92cf32bb29a413143a68c62008085379738aa01b Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_sigmoid_fp16.cpp.7462E7274E7DC93D.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_softrelu_fp16.cpp.47B66AF94BC3AD6A.idx b/.cache/clangd/index/convnd_fwd_xdl_softrelu_fp16.cpp.47B66AF94BC3AD6A.idx new file mode 100755 index 0000000000000000000000000000000000000000..dc4161aaedefcae9ea81c864742531522a3c5ad6 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_softrelu_fp16.cpp.47B66AF94BC3AD6A.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_tanh_fp16.cpp.A1CCA35C668C7A32.idx b/.cache/clangd/index/convnd_fwd_xdl_tanh_fp16.cpp.A1CCA35C668C7A32.idx new file mode 100755 index 0000000000000000000000000000000000000000..ab8d38683ff6c6a70161c6360a0d273ce53086a9 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_tanh_fp16.cpp.A1CCA35C668C7A32.idx differ diff --git a/.cache/clangd/index/convolution_backward_data.hpp.F488158732BCE944.idx b/.cache/clangd/index/convolution_backward_data.hpp.F488158732BCE944.idx new file mode 100755 index 0000000000000000000000000000000000000000..c5dbc82176418fed94741f95d7a27f40c8b462e1 Binary files /dev/null and b/.cache/clangd/index/convolution_backward_data.hpp.F488158732BCE944.idx differ diff --git a/.cache/clangd/index/convolution_backward_data_specialization.hpp.E2F7DA4511290758.idx b/.cache/clangd/index/convolution_backward_data_specialization.hpp.E2F7DA4511290758.idx new file mode 100755 index 0000000000000000000000000000000000000000..6b8e8c6271e93d2c8675bbbbced4d14de8f74a8e Binary files /dev/null and b/.cache/clangd/index/convolution_backward_data_specialization.hpp.E2F7DA4511290758.idx differ diff --git a/.cache/clangd/index/convolution_backward_weight_specialization.hpp.D1A61821B9657E4F.idx b/.cache/clangd/index/convolution_backward_weight_specialization.hpp.D1A61821B9657E4F.idx new file mode 100755 index 0000000000000000000000000000000000000000..eca7cb0f26e6c2a8ddf2214505738d8e3f7d0443 Binary files /dev/null and b/.cache/clangd/index/convolution_backward_weight_specialization.hpp.D1A61821B9657E4F.idx differ diff --git a/.cache/clangd/index/convolution_forward.hpp.8E3EE29C519B7A7E.idx b/.cache/clangd/index/convolution_forward.hpp.8E3EE29C519B7A7E.idx new file mode 100755 index 0000000000000000000000000000000000000000..0882c97343f46feaea6eb49da45d751de553e366 Binary files /dev/null and b/.cache/clangd/index/convolution_forward.hpp.8E3EE29C519B7A7E.idx differ diff --git a/.cache/clangd/index/convolution_forward_specialization.hpp.29B75FDB4D9A7ED7.idx b/.cache/clangd/index/convolution_forward_specialization.hpp.29B75FDB4D9A7ED7.idx new file mode 100755 index 0000000000000000000000000000000000000000..f2c450731f828dcfa918f4fcfa3fcfacd04d3fbd Binary files /dev/null and b/.cache/clangd/index/convolution_forward_specialization.hpp.29B75FDB4D9A7ED7.idx differ diff --git a/.cache/clangd/index/convolution_host_tensor_descriptor_helper.hpp.C7E0FAE3D228F16B.idx b/.cache/clangd/index/convolution_host_tensor_descriptor_helper.hpp.C7E0FAE3D228F16B.idx new file mode 100755 index 0000000000000000000000000000000000000000..0bd93c1507d1e0d545cc35d7521cbda5456923e6 Binary files /dev/null and b/.cache/clangd/index/convolution_host_tensor_descriptor_helper.hpp.C7E0FAE3D228F16B.idx differ diff --git a/.cache/clangd/index/convolution_parameter.cpp.AB24172BD3A32784.idx b/.cache/clangd/index/convolution_parameter.cpp.AB24172BD3A32784.idx new file mode 100755 index 0000000000000000000000000000000000000000..386438875cd57d626260c52417e57c52caf2ec7b Binary files /dev/null and b/.cache/clangd/index/convolution_parameter.cpp.AB24172BD3A32784.idx differ diff --git a/.cache/clangd/index/convolution_parameter.hpp.98A4E91AB8FFEC38.idx b/.cache/clangd/index/convolution_parameter.hpp.98A4E91AB8FFEC38.idx new file mode 100755 index 0000000000000000000000000000000000000000..ae7630697725e62560cc58e42c83dffe80828536 Binary files /dev/null and b/.cache/clangd/index/convolution_parameter.hpp.98A4E91AB8FFEC38.idx differ diff --git a/.cache/clangd/index/copy.hpp.D47879DBFF83B386.idx b/.cache/clangd/index/copy.hpp.D47879DBFF83B386.idx new file mode 100755 index 0000000000000000000000000000000000000000..2101734dc29eb0af12376b4655d9f02581222f76 Binary files /dev/null and b/.cache/clangd/index/copy.hpp.D47879DBFF83B386.idx differ diff --git a/.cache/clangd/index/data_type.hpp.FA421D61CFEE8D86.idx b/.cache/clangd/index/data_type.hpp.FA421D61CFEE8D86.idx new file mode 100755 index 0000000000000000000000000000000000000000..dd885df2c0913a1ce47a716ab2b79efc99c45da5 Binary files /dev/null and b/.cache/clangd/index/data_type.hpp.FA421D61CFEE8D86.idx differ diff --git a/.cache/clangd/index/data_type_enum.hpp.9C984CB93A76A458.idx b/.cache/clangd/index/data_type_enum.hpp.9C984CB93A76A458.idx new file mode 100755 index 0000000000000000000000000000000000000000..2592dc47562aed3a56924d64355f35fcf138f7b6 Binary files /dev/null and b/.cache/clangd/index/data_type_enum.hpp.9C984CB93A76A458.idx differ diff --git a/.cache/clangd/index/debug.hpp.3D8E2A10AC86C578.idx b/.cache/clangd/index/debug.hpp.3D8E2A10AC86C578.idx new file mode 100755 index 0000000000000000000000000000000000000000..9c074be25ada10deac695b36d421c011a1625a9d Binary files /dev/null and b/.cache/clangd/index/debug.hpp.3D8E2A10AC86C578.idx differ diff --git a/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp.8DD0CEE151EEE78C.idx b/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp.8DD0CEE151EEE78C.idx new file mode 100755 index 0000000000000000000000000000000000000000..8fcd998f419c14c9b48a00f1f15d9f1c1b72347c Binary files /dev/null and b/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp.8DD0CEE151EEE78C.idx differ diff --git a/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp.E45DD97792659A1B.idx b/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp.E45DD97792659A1B.idx new file mode 100755 index 0000000000000000000000000000000000000000..c0cba2ba8a5d6ea55c39460ed82103f63ed98f0b Binary files /dev/null and b/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp.E45DD97792659A1B.idx differ diff --git a/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp.0A50ACDD7655DC8C.idx b/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp.0A50ACDD7655DC8C.idx new file mode 100755 index 0000000000000000000000000000000000000000..8c27027b2d1f229ef1e6239cb6367c2d503f759a Binary files /dev/null and b/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp.0A50ACDD7655DC8C.idx differ diff --git a/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp.C65ED37BE0626399.idx b/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp.C65ED37BE0626399.idx new file mode 100755 index 0000000000000000000000000000000000000000..114a030838de07a7899c382ddedcbf5e495d1eb2 Binary files /dev/null and b/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp.C65ED37BE0626399.idx differ diff --git a/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp.322ABBBE306F5168.idx b/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp.322ABBBE306F5168.idx new file mode 100755 index 0000000000000000000000000000000000000000..8f3ecd0d73d45cd2ba6096c95b34df5cde3887b0 Binary files /dev/null and b/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp.322ABBBE306F5168.idx differ diff --git a/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp.BE9138723EE82CA7.idx b/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp.BE9138723EE82CA7.idx new file mode 100755 index 0000000000000000000000000000000000000000..6b6704811cc3f17ccd9942704529e1c99de6d375 Binary files /dev/null and b/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp.BE9138723EE82CA7.idx differ diff --git a/.cache/clangd/index/device_avgpool3d_bwd_ndhwc_ndhwc.hpp.B8CFD83A7F04917D.idx b/.cache/clangd/index/device_avgpool3d_bwd_ndhwc_ndhwc.hpp.B8CFD83A7F04917D.idx new file mode 100755 index 0000000000000000000000000000000000000000..a94d1d3e3f7a5d45a2d41c5e32d46c8c4ecc69c2 Binary files /dev/null and b/.cache/clangd/index/device_avgpool3d_bwd_ndhwc_ndhwc.hpp.B8CFD83A7F04917D.idx differ diff --git a/.cache/clangd/index/device_avgpool_bwd.hpp.65CD0007480F503D.idx b/.cache/clangd/index/device_avgpool_bwd.hpp.65CD0007480F503D.idx new file mode 100755 index 0000000000000000000000000000000000000000..5732d6b8eaae7c71439e2c1cd70237eeb4078cb9 Binary files /dev/null and b/.cache/clangd/index/device_avgpool_bwd.hpp.65CD0007480F503D.idx differ diff --git a/.cache/clangd/index/device_base.hpp.07667515082DA1A8.idx b/.cache/clangd/index/device_base.hpp.07667515082DA1A8.idx new file mode 100755 index 0000000000000000000000000000000000000000..4bba0f4cd960cb41e6daaba2cdf95d318440aae0 Binary files /dev/null and b/.cache/clangd/index/device_base.hpp.07667515082DA1A8.idx differ diff --git a/.cache/clangd/index/device_batched_contraction_multiple_d.hpp.C066C76CB17254D1.idx b/.cache/clangd/index/device_batched_contraction_multiple_d.hpp.C066C76CB17254D1.idx new file mode 100755 index 0000000000000000000000000000000000000000..31dcb6393c81cffd0271fef0ff2372cc2b51ae2b Binary files /dev/null and b/.cache/clangd/index/device_batched_contraction_multiple_d.hpp.C066C76CB17254D1.idx differ diff --git a/.cache/clangd/index/device_batched_contraction_multiple_d_xdl_cshuffle.hpp.9FB9459677B68FA8.idx b/.cache/clangd/index/device_batched_contraction_multiple_d_xdl_cshuffle.hpp.9FB9459677B68FA8.idx new file mode 100755 index 0000000000000000000000000000000000000000..543307e8fa5074db8382e594f18aea9c04031483 Binary files /dev/null and b/.cache/clangd/index/device_batched_contraction_multiple_d_xdl_cshuffle.hpp.9FB9459677B68FA8.idx differ diff --git a/.cache/clangd/index/device_batched_gemm.hpp.0D6DC4AA71BF6793.idx b/.cache/clangd/index/device_batched_gemm.hpp.0D6DC4AA71BF6793.idx new file mode 100755 index 0000000000000000000000000000000000000000..742f35e896ad2ede665fb8be300be6779482bea7 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm.hpp.0D6DC4AA71BF6793.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.2CCE15A52A6097D9.idx b/.cache/clangd/index/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.2CCE15A52A6097D9.idx new file mode 100755 index 0000000000000000000000000000000000000000..a15625dc53d6809026cf4c28be8076ea2e70949c Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.2CCE15A52A6097D9.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp.09BE891BDB3DB7E7.idx b/.cache/clangd/index/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp.09BE891BDB3DB7E7.idx new file mode 100755 index 0000000000000000000000000000000000000000..264f59322027da78d6adfbc63be19c673e9f898d Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp.09BE891BDB3DB7E7.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp.FE05EE1022E21C41.idx b/.cache/clangd/index/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp.FE05EE1022E21C41.idx new file mode 100755 index 0000000000000000000000000000000000000000..622ee5e30fd21bcccc55b23f45e936fb4ac01857 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp.FE05EE1022E21C41.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp.5255F78DB2565F3C.idx b/.cache/clangd/index/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp.5255F78DB2565F3C.idx new file mode 100755 index 0000000000000000000000000000000000000000..86617bc719968eaa23eff3b9499e3acc6e54211f Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp.5255F78DB2565F3C.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.B4883A86B075066E.idx b/.cache/clangd/index/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.B4883A86B075066E.idx new file mode 100755 index 0000000000000000000000000000000000000000..a0adfc213a0231c7b592b547a37f3d106deae15b Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.B4883A86B075066E.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_gemm.hpp.FDFBA95BB6D583D3.idx b/.cache/clangd/index/device_batched_gemm_gemm.hpp.FDFBA95BB6D583D3.idx new file mode 100755 index 0000000000000000000000000000000000000000..c48179452a27ea1d886f4c181a58728d64ca3048 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_gemm.hpp.FDFBA95BB6D583D3.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle.hpp.8AECE81C6A31D6CE.idx b/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle.hpp.8AECE81C6A31D6CE.idx new file mode 100755 index 0000000000000000000000000000000000000000..e2dd29da73b90b93e8cf6f6e6b8fb70895533927 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle.hpp.8AECE81C6A31D6CE.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.24C81AB80A523E1D.idx b/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.24C81AB80A523E1D.idx new file mode 100755 index 0000000000000000000000000000000000000000..bf1a54953606e1317d75bd513e71f598351a7e67 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.24C81AB80A523E1D.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp.42B3A408DD25BE6F.idx b/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp.42B3A408DD25BE6F.idx new file mode 100755 index 0000000000000000000000000000000000000000..16d2c7f352d7026ddc8e252df1c1f26461a0dd3c Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp.42B3A408DD25BE6F.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_multi_d.hpp.65CBFD3787EC8604.idx b/.cache/clangd/index/device_batched_gemm_multi_d.hpp.65CBFD3787EC8604.idx new file mode 100755 index 0000000000000000000000000000000000000000..81cd5060a5fa67cba6695cf079162b8c73e37305 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_multi_d.hpp.65CBFD3787EC8604.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_multi_d_xdl.hpp.5F31589668DB0CDD.idx b/.cache/clangd/index/device_batched_gemm_multi_d_xdl.hpp.5F31589668DB0CDD.idx new file mode 100755 index 0000000000000000000000000000000000000000..cf9895f7830edcfec836354f11d942e3e0ece2c8 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_multi_d_xdl.hpp.5F31589668DB0CDD.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_multiple_d_gemm_multiple_d.hpp.1EC37BB5D09F3746.idx b/.cache/clangd/index/device_batched_gemm_multiple_d_gemm_multiple_d.hpp.1EC37BB5D09F3746.idx new file mode 100755 index 0000000000000000000000000000000000000000..35fe9c96b67e3a3e58751c7ec4abd8c1c2699e83 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_multiple_d_gemm_multiple_d.hpp.1EC37BB5D09F3746.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp.1D083EFB833A8BDD.idx b/.cache/clangd/index/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp.1D083EFB833A8BDD.idx new file mode 100755 index 0000000000000000000000000000000000000000..5163a6be3f5b749f7f72b1e3919524b4cb08274c Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp.1D083EFB833A8BDD.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle.hpp.008517D2C19930C3.idx b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle.hpp.008517D2C19930C3.idx new file mode 100755 index 0000000000000000000000000000000000000000..4420018cd8986a90a01c8cbc7815c3a318d51b9f Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle.hpp.008517D2C19930C3.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp.3CE3CBFFC33E63B5.idx b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp.3CE3CBFFC33E63B5.idx new file mode 100755 index 0000000000000000000000000000000000000000..c279fc209ce70aad4beb16a5a0844674ae62e156 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp.3CE3CBFFC33E63B5.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp.C2D6CBC7BDD03D15.idx b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp.C2D6CBC7BDD03D15.idx new file mode 100755 index 0000000000000000000000000000000000000000..9ee5aec10f6b9ed47e7f4296de854b627d3cbc05 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp.C2D6CBC7BDD03D15.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp.BE70F5327F5BF3ED.idx b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp.BE70F5327F5BF3ED.idx new file mode 100755 index 0000000000000000000000000000000000000000..0b8a348b57f07ec9b11e5f036a7f10a9c1416a5a Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp.BE70F5327F5BF3ED.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp.542AEDD871493BF1.idx b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp.542AEDD871493BF1.idx new file mode 100755 index 0000000000000000000000000000000000000000..3a3f796fdabe954eae49d2520e155e536448c38b Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp.542AEDD871493BF1.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_softmax_gemm.hpp.2CF55436E05C96D5.idx b/.cache/clangd/index/device_batched_gemm_softmax_gemm.hpp.2CF55436E05C96D5.idx new file mode 100755 index 0000000000000000000000000000000000000000..02b70043f1aa046371ace09e12187b3b835207c0 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_softmax_gemm.hpp.2CF55436E05C96D5.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute.hpp.0F50C2899670001D.idx b/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute.hpp.0F50C2899670001D.idx new file mode 100755 index 0000000000000000000000000000000000000000..2e0801fdacf6a40e99de0a25b92fea8403ea1942 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute.hpp.0F50C2899670001D.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp.72B4F2954B40EC19.idx b/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp.72B4F2954B40EC19.idx new file mode 100755 index 0000000000000000000000000000000000000000..06b1588d7d1ec2371471223d265cebc332628cc9 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp.72B4F2954B40EC19.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp.CA655EDB759D211C.idx b/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp.CA655EDB759D211C.idx new file mode 100755 index 0000000000000000000000000000000000000000..b2277470981902e94fa848bc8c5cb3616d03187d Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp.CA655EDB759D211C.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.54BCAA9E5C4C7CA0.idx b/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.54BCAA9E5C4C7CA0.idx new file mode 100755 index 0000000000000000000000000000000000000000..be8b2c55690f9f26a1fe60b536a152127857a53a Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.54BCAA9E5C4C7CA0.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp.5F249F65D9D49000.idx b/.cache/clangd/index/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp.5F249F65D9D49000.idx new file mode 100755 index 0000000000000000000000000000000000000000..9eb0989e92b0f0869f7d1a8273bd6472d4a96f06 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp.5F249F65D9D49000.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.9E1D81891D4D775B.idx b/.cache/clangd/index/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.9E1D81891D4D775B.idx new file mode 100755 index 0000000000000000000000000000000000000000..dd80163482feef2e98357fb5cef9232639fed717 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.9E1D81891D4D775B.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl.hpp.336993554588B91D.idx b/.cache/clangd/index/device_batched_gemm_xdl.hpp.336993554588B91D.idx new file mode 100755 index 0000000000000000000000000000000000000000..b4ddce7eb640e15f8df6c21126a6655cc2fcefc2 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl.hpp.336993554588B91D.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp.01B3A3CE0FB07AA4.idx b/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp.01B3A3CE0FB07AA4.idx new file mode 100755 index 0000000000000000000000000000000000000000..5399de0cb8fc7406235beab0b408115456933611 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp.01B3A3CE0FB07AA4.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp.6732FB26500DDC13.idx b/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp.6732FB26500DDC13.idx new file mode 100755 index 0000000000000000000000000000000000000000..eab90d28fcc767e11770b12255f5197576421348 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp.6732FB26500DDC13.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp.5C50F8886E987210.idx b/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp.5C50F8886E987210.idx new file mode 100755 index 0000000000000000000000000000000000000000..e43f5a3ec057d679362524e9ae18fea1ce94644c Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp.5C50F8886E987210.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp.991CBBA160B6AE8A.idx b/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp.991CBBA160B6AE8A.idx new file mode 100755 index 0000000000000000000000000000000000000000..e003ae8b04f2338a47a17dfb28bf0e3e0b7c9220 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp.991CBBA160B6AE8A.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp.909733D52FF49021.idx b/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp.909733D52FF49021.idx new file mode 100755 index 0000000000000000000000000000000000000000..6c8c17ba4e2938975c8324daeb347a9c6596a12a Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp.909733D52FF49021.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp.EF1B1B5023E691DD.idx b/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp.EF1B1B5023E691DD.idx new file mode 100755 index 0000000000000000000000000000000000000000..ff38a02f7736442c10eac6526e71a26679961c64 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp.EF1B1B5023E691DD.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp.6191F299E76FA25A.idx b/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp.6191F299E76FA25A.idx new file mode 100755 index 0000000000000000000000000000000000000000..28f819c0fa917cd0c536d699661d0ca793a670a6 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp.6191F299E76FA25A.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp.1378958A5E1CC674.idx b/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp.1378958A5E1CC674.idx new file mode 100755 index 0000000000000000000000000000000000000000..0141c037b8b451ac6757dbef814ff51463cc8df7 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp.1378958A5E1CC674.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp.155CED9F59B77387.idx b/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp.155CED9F59B77387.idx new file mode 100755 index 0000000000000000000000000000000000000000..d8138e15dd309456bc58999fb46f9e7841b8f9cf Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp.155CED9F59B77387.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp.021E4B84540D9AAD.idx b/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp.021E4B84540D9AAD.idx new file mode 100755 index 0000000000000000000000000000000000000000..28ab0b9f44f9d0547c54c87efc9651fb4df52aa8 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp.021E4B84540D9AAD.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp.2D0A22E72C5CD4C0.idx b/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp.2D0A22E72C5CD4C0.idx new file mode 100755 index 0000000000000000000000000000000000000000..cd5c1f4cafe6f13a45ff0fffd09ddd6f015d6024 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp.2D0A22E72C5CD4C0.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp.6662845BB6111F27.idx b/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp.6662845BB6111F27.idx new file mode 100755 index 0000000000000000000000000000000000000000..3f50ada9877801be3408953cf6b517979d9918ae Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp.6662845BB6111F27.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp.3B9F48B295268F74.idx b/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp.3B9F48B295268F74.idx new file mode 100755 index 0000000000000000000000000000000000000000..35c931eb6297a5753378d0385ed591b5c8e944c7 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp.3B9F48B295268F74.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp.0C31B57B423E19C5.idx b/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp.0C31B57B423E19C5.idx new file mode 100755 index 0000000000000000000000000000000000000000..446385c8b746e88adfe0a4d2a2874e176bc500cf Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp.0C31B57B423E19C5.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp.8439FC102CF881D2.idx b/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp.8439FC102CF881D2.idx new file mode 100755 index 0000000000000000000000000000000000000000..fb5dbff092b9148742e6fdabe20a68fff520e588 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp.8439FC102CF881D2.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp.9E2B022D116E5337.idx b/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp.9E2B022D116E5337.idx new file mode 100755 index 0000000000000000000000000000000000000000..34c8066366ba5dcbfd74f39a69d356366a8cb4f2 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp.9E2B022D116E5337.idx differ diff --git a/.cache/clangd/index/device_batchnorm_backward.hpp.43E337CEA4624B30.idx b/.cache/clangd/index/device_batchnorm_backward.hpp.43E337CEA4624B30.idx new file mode 100755 index 0000000000000000000000000000000000000000..deb2d76cdfa77318a3c46f9480fb50e8d3bddfef Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_backward.hpp.43E337CEA4624B30.idx differ diff --git a/.cache/clangd/index/device_batchnorm_backward_bf16_instance.cpp.CEBDED0AD1292F09.idx b/.cache/clangd/index/device_batchnorm_backward_bf16_instance.cpp.CEBDED0AD1292F09.idx new file mode 100755 index 0000000000000000000000000000000000000000..a5a01a45fb4db913fb55693086641696fadab140 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_backward_bf16_instance.cpp.CEBDED0AD1292F09.idx differ diff --git a/.cache/clangd/index/device_batchnorm_backward_f16_instance.cpp.D1D5284FB6757AF3.idx b/.cache/clangd/index/device_batchnorm_backward_f16_instance.cpp.D1D5284FB6757AF3.idx new file mode 100755 index 0000000000000000000000000000000000000000..fd4300cc770c9a257fa3143b663cee2a08223740 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_backward_f16_instance.cpp.D1D5284FB6757AF3.idx differ diff --git a/.cache/clangd/index/device_batchnorm_backward_f32_instance.cpp.CA2117C76C8B866B.idx b/.cache/clangd/index/device_batchnorm_backward_f32_instance.cpp.CA2117C76C8B866B.idx new file mode 100755 index 0000000000000000000000000000000000000000..f574f4832e5a9e2b0b3b455deba21effdbb0530c Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_backward_f32_instance.cpp.CA2117C76C8B866B.idx differ diff --git a/.cache/clangd/index/device_batchnorm_backward_f64_instance.cpp.B99A50DCD60DF309.idx b/.cache/clangd/index/device_batchnorm_backward_f64_instance.cpp.B99A50DCD60DF309.idx new file mode 100755 index 0000000000000000000000000000000000000000..dadb38ffcb2b204684c6619e9880983a41d7c434 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_backward_f64_instance.cpp.B99A50DCD60DF309.idx differ diff --git a/.cache/clangd/index/device_batchnorm_backward_impl.hpp.8B916A92DFA8139E.idx b/.cache/clangd/index/device_batchnorm_backward_impl.hpp.8B916A92DFA8139E.idx new file mode 100755 index 0000000000000000000000000000000000000000..7caed0647c0dbcbf1131e80e8d7417373c0858bb Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_backward_impl.hpp.8B916A92DFA8139E.idx differ diff --git a/.cache/clangd/index/device_batchnorm_forward.hpp.0E1593921379F2F3.idx b/.cache/clangd/index/device_batchnorm_forward.hpp.0E1593921379F2F3.idx new file mode 100755 index 0000000000000000000000000000000000000000..2141d90fc1678cebdda1297d7d1d1ee86cff003c Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_forward.hpp.0E1593921379F2F3.idx differ diff --git a/.cache/clangd/index/device_batchnorm_forward_bf16_instance.cpp.F51389A9E014ED43.idx b/.cache/clangd/index/device_batchnorm_forward_bf16_instance.cpp.F51389A9E014ED43.idx new file mode 100755 index 0000000000000000000000000000000000000000..f7789255f3753ca5a2022997185e1d3b07717c4e Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_forward_bf16_instance.cpp.F51389A9E014ED43.idx differ diff --git a/.cache/clangd/index/device_batchnorm_forward_f16_instance.cpp.5B31F4805B2AC1BA.idx b/.cache/clangd/index/device_batchnorm_forward_f16_instance.cpp.5B31F4805B2AC1BA.idx new file mode 100755 index 0000000000000000000000000000000000000000..f191e56fa2764321f2d283b2dd9c64c573dc6dc0 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_forward_f16_instance.cpp.5B31F4805B2AC1BA.idx differ diff --git a/.cache/clangd/index/device_batchnorm_forward_f32_instance.cpp.DAA81C77B8C2B284.idx b/.cache/clangd/index/device_batchnorm_forward_f32_instance.cpp.DAA81C77B8C2B284.idx new file mode 100755 index 0000000000000000000000000000000000000000..52a0fe98471217ea0a8e48398411b440c8f956e6 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_forward_f32_instance.cpp.DAA81C77B8C2B284.idx differ diff --git a/.cache/clangd/index/device_batchnorm_forward_f64_instance.cpp.B473866A874EE221.idx b/.cache/clangd/index/device_batchnorm_forward_f64_instance.cpp.B473866A874EE221.idx new file mode 100755 index 0000000000000000000000000000000000000000..b45020cf829dc5eabe4376915454ff3040f980a6 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_forward_f64_instance.cpp.B473866A874EE221.idx differ diff --git a/.cache/clangd/index/device_batchnorm_forward_impl.hpp.6D2CA9CBDDC84AC3.idx b/.cache/clangd/index/device_batchnorm_forward_impl.hpp.6D2CA9CBDDC84AC3.idx new file mode 100755 index 0000000000000000000000000000000000000000..6491ff3a2142ed430fb58c81dea917bc3add75e8 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_forward_impl.hpp.6D2CA9CBDDC84AC3.idx differ diff --git a/.cache/clangd/index/device_batchnorm_forward_impl_obsolete.hpp.7A0467126AEB6502.idx b/.cache/clangd/index/device_batchnorm_forward_impl_obsolete.hpp.7A0467126AEB6502.idx new file mode 100755 index 0000000000000000000000000000000000000000..3c5031a79ed0dbab67a58be681e0b4b8b1b534ae Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_forward_impl_obsolete.hpp.7A0467126AEB6502.idx differ diff --git a/.cache/clangd/index/device_batchnorm_infer.hpp.4EDE573CB1DF1297.idx b/.cache/clangd/index/device_batchnorm_infer.hpp.4EDE573CB1DF1297.idx new file mode 100755 index 0000000000000000000000000000000000000000..dfc042df11c3761c919a776fd1e036519c4a055f Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_infer.hpp.4EDE573CB1DF1297.idx differ diff --git a/.cache/clangd/index/device_batchnorm_infer_bf16_instance.cpp.FEC3D08EDEA4CE6B.idx b/.cache/clangd/index/device_batchnorm_infer_bf16_instance.cpp.FEC3D08EDEA4CE6B.idx new file mode 100755 index 0000000000000000000000000000000000000000..a15dc51107ee8182145faad40bf3fd9955b36395 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_infer_bf16_instance.cpp.FEC3D08EDEA4CE6B.idx differ diff --git a/.cache/clangd/index/device_batchnorm_infer_f16_instance.cpp.B01DECCDEE63A78A.idx b/.cache/clangd/index/device_batchnorm_infer_f16_instance.cpp.B01DECCDEE63A78A.idx new file mode 100755 index 0000000000000000000000000000000000000000..8eb86213b12319c9890628a003246ab6682e6db3 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_infer_f16_instance.cpp.B01DECCDEE63A78A.idx differ diff --git a/.cache/clangd/index/device_batchnorm_infer_f32_instance.cpp.889B58511EB200D7.idx b/.cache/clangd/index/device_batchnorm_infer_f32_instance.cpp.889B58511EB200D7.idx new file mode 100755 index 0000000000000000000000000000000000000000..ef2737ba86e60c324aad3d1bcdff993db8ef6ccc Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_infer_f32_instance.cpp.889B58511EB200D7.idx differ diff --git a/.cache/clangd/index/device_batchnorm_infer_f64_instance.cpp.0266490535848A23.idx b/.cache/clangd/index/device_batchnorm_infer_f64_instance.cpp.0266490535848A23.idx new file mode 100755 index 0000000000000000000000000000000000000000..400f2f459d4b718482510a32d16aec90836af633 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_infer_f64_instance.cpp.0266490535848A23.idx differ diff --git a/.cache/clangd/index/device_cgemm.hpp.CD6CA3F05D9AAF4D.idx b/.cache/clangd/index/device_cgemm.hpp.CD6CA3F05D9AAF4D.idx new file mode 100755 index 0000000000000000000000000000000000000000..2464a976a0a640aacf49077baede3981b217b5cd Binary files /dev/null and b/.cache/clangd/index/device_cgemm.hpp.CD6CA3F05D9AAF4D.idx differ diff --git a/.cache/clangd/index/device_cgemm_4gemm_xdl_cshuffle.hpp.CB4931718165FBCB.idx b/.cache/clangd/index/device_cgemm_4gemm_xdl_cshuffle.hpp.CB4931718165FBCB.idx new file mode 100755 index 0000000000000000000000000000000000000000..f72ee91a34add4bd2fb28b52d7742f549abe44e0 Binary files /dev/null and b/.cache/clangd/index/device_cgemm_4gemm_xdl_cshuffle.hpp.CB4931718165FBCB.idx differ diff --git a/.cache/clangd/index/device_column_to_image_gndhwc_3d_instance.cpp.8AF56F1E7AD14DC6.idx b/.cache/clangd/index/device_column_to_image_gndhwc_3d_instance.cpp.8AF56F1E7AD14DC6.idx new file mode 100755 index 0000000000000000000000000000000000000000..9acab2618ad9eff6d3b3ff50964e9f90cdd60089 Binary files /dev/null and b/.cache/clangd/index/device_column_to_image_gndhwc_3d_instance.cpp.8AF56F1E7AD14DC6.idx differ diff --git a/.cache/clangd/index/device_column_to_image_gnhwc_2d_instance.cpp.8EBF74F132E896AB.idx b/.cache/clangd/index/device_column_to_image_gnhwc_2d_instance.cpp.8EBF74F132E896AB.idx new file mode 100755 index 0000000000000000000000000000000000000000..4d6db89ae97000e1d76d5f0a4a8f2adb8306e341 Binary files /dev/null and b/.cache/clangd/index/device_column_to_image_gnhwc_2d_instance.cpp.8EBF74F132E896AB.idx differ diff --git a/.cache/clangd/index/device_column_to_image_gnwc_1d_instance.cpp.C6C85A0C333E012A.idx b/.cache/clangd/index/device_column_to_image_gnwc_1d_instance.cpp.C6C85A0C333E012A.idx new file mode 100755 index 0000000000000000000000000000000000000000..cdba2909702006c3e7e9e2c20ae610ffcf2aaecf Binary files /dev/null and b/.cache/clangd/index/device_column_to_image_gnwc_1d_instance.cpp.C6C85A0C333E012A.idx differ diff --git a/.cache/clangd/index/device_column_to_image_impl.hpp.545E1241F8BEB14B.idx b/.cache/clangd/index/device_column_to_image_impl.hpp.545E1241F8BEB14B.idx new file mode 100755 index 0000000000000000000000000000000000000000..940df9a9afd1d13838d4ab44ded568abb54fc218 Binary files /dev/null and b/.cache/clangd/index/device_column_to_image_impl.hpp.545E1241F8BEB14B.idx differ diff --git a/.cache/clangd/index/device_column_to_image_instance.hpp.FA1AC606D0CD0897.idx b/.cache/clangd/index/device_column_to_image_instance.hpp.FA1AC606D0CD0897.idx new file mode 100755 index 0000000000000000000000000000000000000000..ae0eb0a2aa5427090feb265b2935ff2ba0a9660b Binary files /dev/null and b/.cache/clangd/index/device_column_to_image_instance.hpp.FA1AC606D0CD0897.idx differ diff --git a/.cache/clangd/index/device_column_to_image_ndhwgc_3d_instance.cpp.CBDECCEC6F806814.idx b/.cache/clangd/index/device_column_to_image_ndhwgc_3d_instance.cpp.CBDECCEC6F806814.idx new file mode 100755 index 0000000000000000000000000000000000000000..2651b4fc8fbd7d5b2758bdfa60baf63438ddc3d2 Binary files /dev/null and b/.cache/clangd/index/device_column_to_image_ndhwgc_3d_instance.cpp.CBDECCEC6F806814.idx differ diff --git a/.cache/clangd/index/device_column_to_image_nhwgc_2d_instance.cpp.9A3328B90E30CC03.idx b/.cache/clangd/index/device_column_to_image_nhwgc_2d_instance.cpp.9A3328B90E30CC03.idx new file mode 100755 index 0000000000000000000000000000000000000000..94dad8f64f2bd8220f718e6b7b33e2de244d1c70 Binary files /dev/null and b/.cache/clangd/index/device_column_to_image_nhwgc_2d_instance.cpp.9A3328B90E30CC03.idx differ diff --git a/.cache/clangd/index/device_column_to_image_nwgc_1d_instance.cpp.163F7E8C2077B0E0.idx b/.cache/clangd/index/device_column_to_image_nwgc_1d_instance.cpp.163F7E8C2077B0E0.idx new file mode 100755 index 0000000000000000000000000000000000000000..a90d1c9068025c9492535a13286ac31ddbc3270e Binary files /dev/null and b/.cache/clangd/index/device_column_to_image_nwgc_1d_instance.cpp.163F7E8C2077B0E0.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp.79A78F49E52676FD.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp.79A78F49E52676FD.idx new file mode 100755 index 0000000000000000000000000000000000000000..b75711cca43cc1a5911045e402e6b70872c0fcdb Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp.79A78F49E52676FD.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp.DB04A314807BEF1C.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp.DB04A314807BEF1C.idx new file mode 100755 index 0000000000000000000000000000000000000000..ac82ae08bb5af14c7c8919bc188a486b139a0abf Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp.DB04A314807BEF1C.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp.E624632F1E715961.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp.E624632F1E715961.idx new file mode 100755 index 0000000000000000000000000000000000000000..c2aa82d390b3b8f61d417be73f909e3e3cc13b1a Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp.E624632F1E715961.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp.CACB611E1CE9F1B5.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp.CACB611E1CE9F1B5.idx new file mode 100755 index 0000000000000000000000000000000000000000..f266e053ca5584ed9fa9fc69ee52b7b5bf7dfd4f Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp.CACB611E1CE9F1B5.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp.9E3DEE644D650E5A.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp.9E3DEE644D650E5A.idx new file mode 100755 index 0000000000000000000000000000000000000000..c09d48887d1f4ffa36e2fed0b47d3915987aa186 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp.9E3DEE644D650E5A.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp.384AC715F1A68557.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp.384AC715F1A68557.idx new file mode 100755 index 0000000000000000000000000000000000000000..f6f9f8744f09e442171a2c072c694f0f81ac1d40 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp.384AC715F1A68557.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp.66D5EE934903111A.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp.66D5EE934903111A.idx new file mode 100755 index 0000000000000000000000000000000000000000..d83bd12f790f31b8ae686478970c869c73997e80 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp.66D5EE934903111A.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp.8194F48DCC1F513B.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp.8194F48DCC1F513B.idx new file mode 100755 index 0000000000000000000000000000000000000000..c83594015d663dae4e086940cd84805fff0e894a Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp.8194F48DCC1F513B.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp.ED87AF8FD1B46167.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp.ED87AF8FD1B46167.idx new file mode 100755 index 0000000000000000000000000000000000000000..3e8f17b950f0c45138ee0e1a9d2efbc76b7b979e Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp.ED87AF8FD1B46167.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp.9B28805BAED0F92D.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp.9B28805BAED0F92D.idx new file mode 100755 index 0000000000000000000000000000000000000000..c53eaf299b85d9a8d915b28eb18c05af486a5e36 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp.9B28805BAED0F92D.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp.7E6A2A9800DD0E7A.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp.7E6A2A9800DD0E7A.idx new file mode 100755 index 0000000000000000000000000000000000000000..24f610b9917cea210297ac533ad1d645db5ca971 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp.7E6A2A9800DD0E7A.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp.E0C6A616E43DC9DD.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp.E0C6A616E43DC9DD.idx new file mode 100755 index 0000000000000000000000000000000000000000..594327118c05270ed4b8a28a1b2a3722061d57e2 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp.E0C6A616E43DC9DD.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp.01988F25803E8456.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp.01988F25803E8456.idx new file mode 100755 index 0000000000000000000000000000000000000000..8fc2ae409e11a6eff8767e25a8ce52fc7bca97a0 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp.01988F25803E8456.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp.AFEB4718FB33138B.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp.AFEB4718FB33138B.idx new file mode 100755 index 0000000000000000000000000000000000000000..5841a9e42596a101a931e2131c25f2c7cfab99c7 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp.AFEB4718FB33138B.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp.D913F33B640C756C.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp.D913F33B640C756C.idx new file mode 100755 index 0000000000000000000000000000000000000000..6749483006057b330068d73b6ea73e6f0793ed29 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp.D913F33B640C756C.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp.EF41093E7954CFCE.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp.EF41093E7954CFCE.idx new file mode 100755 index 0000000000000000000000000000000000000000..387a7c6dedd9e90e42cadcc23e7bb7d8eccd1640 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp.EF41093E7954CFCE.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp.CB877B16EA5C28D0.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp.CB877B16EA5C28D0.idx new file mode 100755 index 0000000000000000000000000000000000000000..acb6a31ef33a766f44b7a2d4ca376003cf335902 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp.CB877B16EA5C28D0.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp.7AE9E872C7D0A549.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp.7AE9E872C7D0A549.idx new file mode 100755 index 0000000000000000000000000000000000000000..091633c603024fe8031a0986e0c5da80ad5ad883 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp.7AE9E872C7D0A549.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp.E366D05CBA9CA229.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp.E366D05CBA9CA229.idx new file mode 100755 index 0000000000000000000000000000000000000000..3f37c7ad45668d8aa3aaacc45e18b4651e1033ec Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp.E366D05CBA9CA229.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp.F88287BB0527CE6D.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp.F88287BB0527CE6D.idx new file mode 100755 index 0000000000000000000000000000000000000000..bf87b5d9096e21a392eaf0b7d24e08c594b79342 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp.F88287BB0527CE6D.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp.03F037ABB1A17197.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp.03F037ABB1A17197.idx new file mode 100755 index 0000000000000000000000000000000000000000..1a849d12526814e5e41f14bd747fafdba3a4b5e0 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp.03F037ABB1A17197.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp.237A2B88B920F202.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp.237A2B88B920F202.idx new file mode 100755 index 0000000000000000000000000000000000000000..3f92a5e304d63fe677f95353cc386cd07bb41e5b Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp.237A2B88B920F202.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp.177CE21E4E683B24.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp.177CE21E4E683B24.idx new file mode 100755 index 0000000000000000000000000000000000000000..1a2c7da533fd7c39c3b7269ef7e28448e1c4d5e7 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp.177CE21E4E683B24.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp.FE8F7C3907743A26.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp.FE8F7C3907743A26.idx new file mode 100755 index 0000000000000000000000000000000000000000..8fa891ddf496febc262bf46e2d1a1263a2838f24 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp.FE8F7C3907743A26.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp.1FB1A94EA79EE13A.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp.1FB1A94EA79EE13A.idx new file mode 100755 index 0000000000000000000000000000000000000000..21d5975b5b2f14967e2d6964ef18fedf01b0f56e Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp.1FB1A94EA79EE13A.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp.6BFB81AFEEB1C1B7.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp.6BFB81AFEEB1C1B7.idx new file mode 100755 index 0000000000000000000000000000000000000000..a14b620c85634329659e61b84691e11171dc8904 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp.6BFB81AFEEB1C1B7.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp.D3FA995D6DF314A6.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp.D3FA995D6DF314A6.idx new file mode 100755 index 0000000000000000000000000000000000000000..07640e52fe382b4696693fb68f7fc9b283a55810 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp.D3FA995D6DF314A6.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp.D00F48772AF74F17.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp.D00F48772AF74F17.idx new file mode 100755 index 0000000000000000000000000000000000000000..2e806c0d53de569bc332767a4ddb8d1cccd02bff Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp.D00F48772AF74F17.idx differ diff --git a/.cache/clangd/index/device_contraction_instance.hpp.15AB9D8608FA40A0.idx b/.cache/clangd/index/device_contraction_instance.hpp.15AB9D8608FA40A0.idx new file mode 100755 index 0000000000000000000000000000000000000000..3fd0c85c74e220c80b25ac27af2c87f7b684a0f2 Binary files /dev/null and b/.cache/clangd/index/device_contraction_instance.hpp.15AB9D8608FA40A0.idx differ diff --git a/.cache/clangd/index/device_contraction_multiple_abd.hpp.D0EC339053CFF7D0.idx b/.cache/clangd/index/device_contraction_multiple_abd.hpp.D0EC339053CFF7D0.idx new file mode 100755 index 0000000000000000000000000000000000000000..f7d14160a2b20adec1f5b4dc6832d13fd2c2ca6d Binary files /dev/null and b/.cache/clangd/index/device_contraction_multiple_abd.hpp.D0EC339053CFF7D0.idx differ diff --git a/.cache/clangd/index/device_contraction_multiple_abd_xdl_cshuffle.hpp.9AA6B0ABD9EE929D.idx b/.cache/clangd/index/device_contraction_multiple_abd_xdl_cshuffle.hpp.9AA6B0ABD9EE929D.idx new file mode 100755 index 0000000000000000000000000000000000000000..b91b2180af9c1daa7724576aed60c2f5c3069fd1 Binary files /dev/null and b/.cache/clangd/index/device_contraction_multiple_abd_xdl_cshuffle.hpp.9AA6B0ABD9EE929D.idx differ diff --git a/.cache/clangd/index/device_contraction_multiple_d.hpp.CA3D08C20FA799BC.idx b/.cache/clangd/index/device_contraction_multiple_d.hpp.CA3D08C20FA799BC.idx new file mode 100755 index 0000000000000000000000000000000000000000..2682298ab8bd6bef444da950c564297113454e28 Binary files /dev/null and b/.cache/clangd/index/device_contraction_multiple_d.hpp.CA3D08C20FA799BC.idx differ diff --git a/.cache/clangd/index/device_contraction_multiple_d_xdl_cshuffle.hpp.18531F18DA84F612.idx b/.cache/clangd/index/device_contraction_multiple_d_xdl_cshuffle.hpp.18531F18DA84F612.idx new file mode 100755 index 0000000000000000000000000000000000000000..ac91121f3a05b53d952a68f4de9d108ff94498a5 Binary files /dev/null and b/.cache/clangd/index/device_contraction_multiple_d_xdl_cshuffle.hpp.18531F18DA84F612.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp.634AB0569FA8301A.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp.634AB0569FA8301A.idx new file mode 100755 index 0000000000000000000000000000000000000000..c01e8c3066aee8b6e9453358437b7bf230c9ab1e Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp.634AB0569FA8301A.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp.1229C32BCA5E8056.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp.1229C32BCA5E8056.idx new file mode 100755 index 0000000000000000000000000000000000000000..02f0d483b14da7fd05ead248db17e3980c0ade73 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp.1229C32BCA5E8056.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp.B21239C1EC74587F.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp.B21239C1EC74587F.idx new file mode 100755 index 0000000000000000000000000000000000000000..9d809e778fef44de9314ec4b866a41ba3fc949d8 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp.B21239C1EC74587F.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp.92168E842E069568.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp.92168E842E069568.idx new file mode 100755 index 0000000000000000000000000000000000000000..176cf4a6bab2b96f3f0783d3a5d2e29dcb8fa764 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp.92168E842E069568.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp.8EFB8D272907AC49.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp.8EFB8D272907AC49.idx new file mode 100755 index 0000000000000000000000000000000000000000..677001601472e23591b70f15b5b0a898e3280e24 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp.8EFB8D272907AC49.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp.4BAF3FDF22005D6C.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp.4BAF3FDF22005D6C.idx new file mode 100755 index 0000000000000000000000000000000000000000..a8853f85e194f3cbc8c2cab75d59482e35267d36 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp.4BAF3FDF22005D6C.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp.F76E34EF68D8A65B.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp.F76E34EF68D8A65B.idx new file mode 100755 index 0000000000000000000000000000000000000000..9742712adab77adc5f0c1d896116b957af42389d Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp.F76E34EF68D8A65B.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp.1704AA4E0EFAB142.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp.1704AA4E0EFAB142.idx new file mode 100755 index 0000000000000000000000000000000000000000..adbdc998800c3efc8d10d96e800d5422f30486e3 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp.1704AA4E0EFAB142.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp.29A1243F9B97E124.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp.29A1243F9B97E124.idx new file mode 100755 index 0000000000000000000000000000000000000000..4ce0e054473e5fedd49cd2f280dfbf4a2d250a0a Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp.29A1243F9B97E124.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp.020B0D68AE892602.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp.020B0D68AE892602.idx new file mode 100755 index 0000000000000000000000000000000000000000..8747bfe8e9f72319c5f428fb4e72c738bf1199ea Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp.020B0D68AE892602.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp.A426959DB1641563.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp.A426959DB1641563.idx new file mode 100755 index 0000000000000000000000000000000000000000..d219e8aff481d8bffd2b76a9748c34c4579a92ce Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp.A426959DB1641563.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp.1B1383BC1860443B.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp.1B1383BC1860443B.idx new file mode 100755 index 0000000000000000000000000000000000000000..2948f37983b57768f927cb52015b0be32fbc5d6a Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp.1B1383BC1860443B.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp.C0ABEB6CD7C15E09.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp.C0ABEB6CD7C15E09.idx new file mode 100755 index 0000000000000000000000000000000000000000..7e2b545b60b6ad23faf1a8653c82d40fc641c0d0 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp.C0ABEB6CD7C15E09.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp.76E857B0424F98D1.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp.76E857B0424F98D1.idx new file mode 100755 index 0000000000000000000000000000000000000000..b1f67dcb1a7ac6f5623e339ceaff876bf86dca8a Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp.76E857B0424F98D1.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp.2C4EF55D06D154CA.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp.2C4EF55D06D154CA.idx new file mode 100755 index 0000000000000000000000000000000000000000..3a9650f2cbe6916fe7c4a2fe8182487f21011394 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp.2C4EF55D06D154CA.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp.D9C825C0FBC7162C.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp.D9C825C0FBC7162C.idx new file mode 100755 index 0000000000000000000000000000000000000000..7082d48740f58d513f0b555108cbab1ab6e2084e Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp.D9C825C0FBC7162C.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp.9ED7E55B00DD6F70.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp.9ED7E55B00DD6F70.idx new file mode 100755 index 0000000000000000000000000000000000000000..9d5a95265df58c67404669153020cc21fd0a10c1 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp.9ED7E55B00DD6F70.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp.87288671AE2510FD.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp.87288671AE2510FD.idx new file mode 100755 index 0000000000000000000000000000000000000000..f001fd5bcbedd211d2343a5fa0b27bf82881fb77 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp.87288671AE2510FD.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp.AA8697A6E53E704C.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp.AA8697A6E53E704C.idx new file mode 100755 index 0000000000000000000000000000000000000000..b193fa0d52087876331fb2717f71523337ebef89 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp.AA8697A6E53E704C.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp.FC0AF91C98199AEA.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp.FC0AF91C98199AEA.idx new file mode 100755 index 0000000000000000000000000000000000000000..d3e8469792c2a1643fae5eadcadcac4910b2df6f Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp.FC0AF91C98199AEA.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp.265462C1E86DC0F1.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp.265462C1E86DC0F1.idx new file mode 100755 index 0000000000000000000000000000000000000000..c12d142b726f12317e35795695df877ab1f254c4 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp.265462C1E86DC0F1.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp.32EBBC25AA6C0CF0.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp.32EBBC25AA6C0CF0.idx new file mode 100755 index 0000000000000000000000000000000000000000..51cf0aed40d2a3b35cbb7618d433b7a076b3eae4 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp.32EBBC25AA6C0CF0.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp.5C1840A2D370420C.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp.5C1840A2D370420C.idx new file mode 100755 index 0000000000000000000000000000000000000000..3da36c72d9e86d101a5353e785399067a9a6ca5b Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp.5C1840A2D370420C.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp.05BFD4E4455B0BDF.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp.05BFD4E4455B0BDF.idx new file mode 100755 index 0000000000000000000000000000000000000000..f5dcda37c76c7522abe7823614f373dfe39a14af Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp.05BFD4E4455B0BDF.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp.BFA2076F1038BB65.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp.BFA2076F1038BB65.idx new file mode 100755 index 0000000000000000000000000000000000000000..3a0b9996ef0ab3246589f737ae537bc992daf413 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp.BFA2076F1038BB65.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp.16DF4E204049F20E.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp.16DF4E204049F20E.idx new file mode 100755 index 0000000000000000000000000000000000000000..8732f9fe6c321f9bb43c733a3e393aa67621e5a3 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp.16DF4E204049F20E.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp.25AD39BA7B2B9C77.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp.25AD39BA7B2B9C77.idx new file mode 100755 index 0000000000000000000000000000000000000000..01b0bfc2e855ac318ef54a91fab47fa940876ce2 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp.25AD39BA7B2B9C77.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp.F46067E1C6499190.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp.F46067E1C6499190.idx new file mode 100755 index 0000000000000000000000000000000000000000..0473569119788e5b6246dffb8ebe14388950507f Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp.F46067E1C6499190.idx differ diff --git a/.cache/clangd/index/device_contraction_utils.hpp.39AAD30D5F9C497C.idx b/.cache/clangd/index/device_contraction_utils.hpp.39AAD30D5F9C497C.idx new file mode 100755 index 0000000000000000000000000000000000000000..c2cbd1e1a1692f11627d4b6e09a3ea72f71c087e Binary files /dev/null and b/.cache/clangd/index/device_contraction_utils.hpp.39AAD30D5F9C497C.idx differ diff --git a/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp.F95715A59DC357E4.idx b/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp.F95715A59DC357E4.idx new file mode 100755 index 0000000000000000000000000000000000000000..b1073d7b651f33fc29804402529ec7b1c8d61a16 Binary files /dev/null and b/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp.F95715A59DC357E4.idx differ diff --git a/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp.11B1653CC880738D.idx b/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp.11B1653CC880738D.idx new file mode 100755 index 0000000000000000000000000000000000000000..68cf27aaa1202628cbb30a2bfd66eb9d5111847f Binary files /dev/null and b/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp.11B1653CC880738D.idx differ diff --git a/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp.1FF1E6FFC39B05F6.idx b/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp.1FF1E6FFC39B05F6.idx new file mode 100755 index 0000000000000000000000000000000000000000..e07a6300a72ed47e908b2099db0c07114f793f20 Binary files /dev/null and b/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp.1FF1E6FFC39B05F6.idx differ diff --git a/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp.27AC774D5173DB46.idx b/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp.27AC774D5173DB46.idx new file mode 100755 index 0000000000000000000000000000000000000000..66a34ea03367382671aebf4fc8ee5b0bb88fa8e4 Binary files /dev/null and b/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp.27AC774D5173DB46.idx differ diff --git a/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp.5A99B55004C02041.idx b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp.5A99B55004C02041.idx new file mode 100755 index 0000000000000000000000000000000000000000..c6b6e83788c81dc0450836fc55c958dcc29bcffb Binary files /dev/null and b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp.5A99B55004C02041.idx differ diff --git a/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.38978BB017721061.idx b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.38978BB017721061.idx new file mode 100755 index 0000000000000000000000000000000000000000..51cc782372555eec2c81853e23af121c6bba8bf6 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.38978BB017721061.idx differ diff --git a/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.015682E6D487BC89.idx b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.015682E6D487BC89.idx new file mode 100755 index 0000000000000000000000000000000000000000..c4c25ff48838a1547d6e188c5c8fb9e1ed224a53 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.015682E6D487BC89.idx differ diff --git a/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.24521662C376153B.idx b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.24521662C376153B.idx new file mode 100755 index 0000000000000000000000000000000000000000..3e2d48cf35e1680f1a12a15b0a2689e5a1deb194 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.24521662C376153B.idx differ diff --git a/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.050ACEE75FFD374F.idx b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.050ACEE75FFD374F.idx new file mode 100755 index 0000000000000000000000000000000000000000..e2fe3f761f744605aa14aa612eb86d5125cb3452 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.050ACEE75FFD374F.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp.095F638AA48F9495.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp.095F638AA48F9495.idx new file mode 100755 index 0000000000000000000000000000000000000000..d85338eaacd087c7efd5f3c17452bf355b982902 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp.095F638AA48F9495.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp.03A5D53DEE4E2511.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp.03A5D53DEE4E2511.idx new file mode 100755 index 0000000000000000000000000000000000000000..1e6eaa666d160cbcbe949c0b72be3bc51d425974 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp.03A5D53DEE4E2511.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp.FF7059ED53F4F0F2.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp.FF7059ED53F4F0F2.idx new file mode 100755 index 0000000000000000000000000000000000000000..425f948f5af155f3db87e3685d946cfd63451258 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp.FF7059ED53F4F0F2.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp.2C9D98DCC57DC34E.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp.2C9D98DCC57DC34E.idx new file mode 100755 index 0000000000000000000000000000000000000000..f8183a1fe746cb103bebc3bd426a61786aafe24e Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp.2C9D98DCC57DC34E.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp.66D8D8BBD73054E8.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp.66D8D8BBD73054E8.idx new file mode 100755 index 0000000000000000000000000000000000000000..91c205427ae80dd2e8daa7b0df533724c75590f4 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp.66D8D8BBD73054E8.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp.3FB08C96D8C7394B.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp.3FB08C96D8C7394B.idx new file mode 100755 index 0000000000000000000000000000000000000000..d5ab954aaf3b6965016f42cfbf71039d4f1cf0cc Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp.3FB08C96D8C7394B.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp.FAAE94F7CEBB58BA.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp.FAAE94F7CEBB58BA.idx new file mode 100755 index 0000000000000000000000000000000000000000..899c5aff7715c4d209497661b0b6c3c5f8ed171e Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp.FAAE94F7CEBB58BA.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.9102AFA84B67CE1B.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.9102AFA84B67CE1B.idx new file mode 100755 index 0000000000000000000000000000000000000000..2ab0852fc75691915011e47f09e38435a8753bc4 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.9102AFA84B67CE1B.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.656EAF4E8A7F404D.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.656EAF4E8A7F404D.idx new file mode 100755 index 0000000000000000000000000000000000000000..859313a10d4013b604c653674edde75f22f009b0 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.656EAF4E8A7F404D.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.793B97DB82C3A02D.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.793B97DB82C3A02D.idx new file mode 100755 index 0000000000000000000000000000000000000000..53885d0cde8de127e651aab5d203a3c9940bb908 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.793B97DB82C3A02D.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.A7E63495E5270E3E.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.A7E63495E5270E3E.idx new file mode 100755 index 0000000000000000000000000000000000000000..43cbd00430e1ef8d37e1679bd467d846aae33aac Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.A7E63495E5270E3E.idx differ diff --git a/.cache/clangd/index/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp.486913CE7740FA6B.idx b/.cache/clangd/index/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp.486913CE7740FA6B.idx new file mode 100755 index 0000000000000000000000000000000000000000..e9ac8a547ff8d3d54d81782083e32ddf11fa9daa Binary files /dev/null and b/.cache/clangd/index/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp.486913CE7740FA6B.idx differ diff --git a/.cache/clangd/index/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp.8577D499B9483812.idx b/.cache/clangd/index/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp.8577D499B9483812.idx new file mode 100755 index 0000000000000000000000000000000000000000..2fbe2fad4571cc4a576df95f0d8de595ab3153a5 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp.8577D499B9483812.idx differ diff --git a/.cache/clangd/index/device_conv2d_xdl_int8_instance.hpp.78D2B94EA941EB43.idx b/.cache/clangd/index/device_conv2d_xdl_int8_instance.hpp.78D2B94EA941EB43.idx new file mode 100755 index 0000000000000000000000000000000000000000..750891291d2a03016f3a94b15763bdc025b82b3c Binary files /dev/null and b/.cache/clangd/index/device_conv2d_xdl_int8_instance.hpp.78D2B94EA941EB43.idx differ diff --git a/.cache/clangd/index/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp.C31E228888CDC52C.idx b/.cache/clangd/index/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp.C31E228888CDC52C.idx new file mode 100755 index 0000000000000000000000000000000000000000..fe6372ca63cb2721fdcb10f26dee22a4a0f5a479 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp.C31E228888CDC52C.idx differ diff --git a/.cache/clangd/index/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp.108EC38462E7A7B4.idx b/.cache/clangd/index/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp.108EC38462E7A7B4.idx new file mode 100755 index 0000000000000000000000000000000000000000..1fdd9321f22b51a53975f51f1a9bec3d082ed20c Binary files /dev/null and b/.cache/clangd/index/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp.108EC38462E7A7B4.idx differ diff --git a/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp.7487AFBFF492DC5A.idx b/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp.7487AFBFF492DC5A.idx new file mode 100755 index 0000000000000000000000000000000000000000..e8f0dd3b11cf54c84cf631099e1d9fed115af760 Binary files /dev/null and b/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp.7487AFBFF492DC5A.idx differ diff --git a/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp.B17F2553F57B25A6.idx b/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp.B17F2553F57B25A6.idx new file mode 100755 index 0000000000000000000000000000000000000000..ef4799b3cbd8ab35a55c67b7f4828f12dc89820f Binary files /dev/null and b/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp.B17F2553F57B25A6.idx differ diff --git a/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp.D3A7F488899285AF.idx b/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp.D3A7F488899285AF.idx new file mode 100755 index 0000000000000000000000000000000000000000..6cd9679430e41d44e75c1a870c248afb25b527fa Binary files /dev/null and b/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp.D3A7F488899285AF.idx differ diff --git a/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp.31CAAB24531BB9D5.idx b/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp.31CAAB24531BB9D5.idx new file mode 100755 index 0000000000000000000000000000000000000000..a046bf1756e594a1676937928648053bb9b98262 Binary files /dev/null and b/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp.31CAAB24531BB9D5.idx differ diff --git a/.cache/clangd/index/device_conv_bwd_data.hpp.81B90E235167286C.idx b/.cache/clangd/index/device_conv_bwd_data.hpp.81B90E235167286C.idx new file mode 100755 index 0000000000000000000000000000000000000000..908388d1266dc900d7c214b317d6565545662fb1 Binary files /dev/null and b/.cache/clangd/index/device_conv_bwd_data.hpp.81B90E235167286C.idx differ diff --git a/.cache/clangd/index/device_conv_fwd.hpp.257583CFB6A8DC72.idx b/.cache/clangd/index/device_conv_fwd.hpp.257583CFB6A8DC72.idx new file mode 100755 index 0000000000000000000000000000000000000000..dbd43ba67a326e5d4aeaa6685271fbc78c79ee02 Binary files /dev/null and b/.cache/clangd/index/device_conv_fwd.hpp.257583CFB6A8DC72.idx differ diff --git a/.cache/clangd/index/device_conv_fwd_bias_activation.hpp.D4F53CD24AD4278A.idx b/.cache/clangd/index/device_conv_fwd_bias_activation.hpp.D4F53CD24AD4278A.idx new file mode 100755 index 0000000000000000000000000000000000000000..21b3eb1024d0180643cdb18e7450965f1a62b34d Binary files /dev/null and b/.cache/clangd/index/device_conv_fwd_bias_activation.hpp.D4F53CD24AD4278A.idx differ diff --git a/.cache/clangd/index/device_conv_fwd_bias_activation_add.hpp.D947A059FE31F7BB.idx b/.cache/clangd/index/device_conv_fwd_bias_activation_add.hpp.D947A059FE31F7BB.idx new file mode 100755 index 0000000000000000000000000000000000000000..a7f973a12c294027e4af0b7259908f96cac4f0ac Binary files /dev/null and b/.cache/clangd/index/device_conv_fwd_bias_activation_add.hpp.D947A059FE31F7BB.idx differ diff --git a/.cache/clangd/index/device_conv_tensor_rearrange.hpp.90268624B8539288.idx b/.cache/clangd/index/device_conv_tensor_rearrange.hpp.90268624B8539288.idx new file mode 100755 index 0000000000000000000000000000000000000000..02e20a697ce7129c9a69712aed96e148b25db11f Binary files /dev/null and b/.cache/clangd/index/device_conv_tensor_rearrange.hpp.90268624B8539288.idx differ diff --git a/.cache/clangd/index/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp.A70DE3BD61066477.idx b/.cache/clangd/index/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp.A70DE3BD61066477.idx new file mode 100755 index 0000000000000000000000000000000000000000..a086a3c36bec77231d9368d10b9611ddd59de742 Binary files /dev/null and b/.cache/clangd/index/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp.A70DE3BD61066477.idx differ diff --git a/.cache/clangd/index/device_elementwise.hpp.00198FF49AAFC017.idx b/.cache/clangd/index/device_elementwise.hpp.00198FF49AAFC017.idx new file mode 100755 index 0000000000000000000000000000000000000000..7fcb924eafebadba033a0965debada9c2fe9137b Binary files /dev/null and b/.cache/clangd/index/device_elementwise.hpp.00198FF49AAFC017.idx differ diff --git a/.cache/clangd/index/device_elementwise_2d_impl.hpp.30FD009D4EA9FEE5.idx b/.cache/clangd/index/device_elementwise_2d_impl.hpp.30FD009D4EA9FEE5.idx new file mode 100755 index 0000000000000000000000000000000000000000..8746f853461b34dad76e58cc781c222148857142 Binary files /dev/null and b/.cache/clangd/index/device_elementwise_2d_impl.hpp.30FD009D4EA9FEE5.idx differ diff --git a/.cache/clangd/index/device_elementwise_3d_impl.hpp.EA5D0D82E2445BC8.idx b/.cache/clangd/index/device_elementwise_3d_impl.hpp.EA5D0D82E2445BC8.idx new file mode 100755 index 0000000000000000000000000000000000000000..f0659dbd90dcaad4f57c9050ae412e38ab978e29 Binary files /dev/null and b/.cache/clangd/index/device_elementwise_3d_impl.hpp.EA5D0D82E2445BC8.idx differ diff --git a/.cache/clangd/index/device_elementwise_impl.hpp.54205731D4AB5FA8.idx b/.cache/clangd/index/device_elementwise_impl.hpp.54205731D4AB5FA8.idx new file mode 100755 index 0000000000000000000000000000000000000000..b152cb6b1e06738df4001a06f593cc411673c7c1 Binary files /dev/null and b/.cache/clangd/index/device_elementwise_impl.hpp.54205731D4AB5FA8.idx differ diff --git a/.cache/clangd/index/device_elementwise_normalization.hpp.2F6431A98ECDBEBB.idx b/.cache/clangd/index/device_elementwise_normalization.hpp.2F6431A98ECDBEBB.idx new file mode 100755 index 0000000000000000000000000000000000000000..98110b189d7f8f53264cefe176db63051d344318 Binary files /dev/null and b/.cache/clangd/index/device_elementwise_normalization.hpp.2F6431A98ECDBEBB.idx differ diff --git a/.cache/clangd/index/device_elementwise_normalization_f16_instance.cpp.B8CDDDF357D7D999.idx b/.cache/clangd/index/device_elementwise_normalization_f16_instance.cpp.B8CDDDF357D7D999.idx new file mode 100755 index 0000000000000000000000000000000000000000..a761d4c1dd4dbe828d78dc26a536fa1726370a89 Binary files /dev/null and b/.cache/clangd/index/device_elementwise_normalization_f16_instance.cpp.B8CDDDF357D7D999.idx differ diff --git a/.cache/clangd/index/device_elementwise_normalization_impl.hpp.D9648470B11E62DC.idx b/.cache/clangd/index/device_elementwise_normalization_impl.hpp.D9648470B11E62DC.idx new file mode 100755 index 0000000000000000000000000000000000000000..c9bc573c0b4b008a69763a0471a60487937ac6db Binary files /dev/null and b/.cache/clangd/index/device_elementwise_normalization_impl.hpp.D9648470B11E62DC.idx differ diff --git a/.cache/clangd/index/device_elementwise_scale.hpp.A8B4C8DA3A99ADD2.idx b/.cache/clangd/index/device_elementwise_scale.hpp.A8B4C8DA3A99ADD2.idx new file mode 100755 index 0000000000000000000000000000000000000000..fbd142ba7658c5685117cd4f5ce35f17bf6fe26c Binary files /dev/null and b/.cache/clangd/index/device_elementwise_scale.hpp.A8B4C8DA3A99ADD2.idx differ diff --git a/.cache/clangd/index/device_elementwise_scale_impl.hpp.792CB7A86911D632.idx b/.cache/clangd/index/device_elementwise_scale_impl.hpp.792CB7A86911D632.idx new file mode 100755 index 0000000000000000000000000000000000000000..e13d1a762625996fefd2db3eafe6d05370e164ed Binary files /dev/null and b/.cache/clangd/index/device_elementwise_scale_impl.hpp.792CB7A86911D632.idx differ diff --git a/.cache/clangd/index/device_gemm.hpp.C11C88B2080F572F.idx b/.cache/clangd/index/device_gemm.hpp.C11C88B2080F572F.idx new file mode 100755 index 0000000000000000000000000000000000000000..c28118b71176e92d460ac08d29f42cd5ce71fde7 Binary files /dev/null and b/.cache/clangd/index/device_gemm.hpp.C11C88B2080F572F.idx differ diff --git a/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp.236524D86BD9A065.idx b/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp.236524D86BD9A065.idx new file mode 100755 index 0000000000000000000000000000000000000000..c83434fafd6eec7a97a393259688d13bf6471b8c Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp.236524D86BD9A065.idx differ diff --git a/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp.3F5C595E84E47B20.idx b/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp.3F5C595E84E47B20.idx new file mode 100755 index 0000000000000000000000000000000000000000..223bcbe35d7fd61d2da420538edcd2dc94fbfce9 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp.3F5C595E84E47B20.idx differ diff --git a/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.5836A0C2B3A14787.idx b/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.5836A0C2B3A14787.idx new file mode 100755 index 0000000000000000000000000000000000000000..0ad5910275629856bc288d9a1e42bd2f979c9520 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.5836A0C2B3A14787.idx differ diff --git a/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.E0256C2451B1EE93.idx b/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.E0256C2451B1EE93.idx new file mode 100755 index 0000000000000000000000000000000000000000..3d16fde17d35d909cef6021623122bf04c3eacbc Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.E0256C2451B1EE93.idx differ diff --git a/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.EED2788EEB0B75F1.idx b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.EED2788EEB0B75F1.idx new file mode 100755 index 0000000000000000000000000000000000000000..8b024a024dd3644e1aa4d92ed950e905fae3a81a Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.EED2788EEB0B75F1.idx differ diff --git a/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp.F71CF786157F3658.idx b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp.F71CF786157F3658.idx new file mode 100755 index 0000000000000000000000000000000000000000..eae72dcfa4a391229a9925f3ec4cf27c9e381427 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp.F71CF786157F3658.idx differ diff --git a/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp.777F64009D887A71.idx b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp.777F64009D887A71.idx new file mode 100755 index 0000000000000000000000000000000000000000..633ba972cddaf3ede8859830955406385fbcaadc Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp.777F64009D887A71.idx differ diff --git a/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp.9B8C2015B4635A97.idx b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp.9B8C2015B4635A97.idx new file mode 100755 index 0000000000000000000000000000000000000000..e66f128f60d3035ffa026068300ed8862092a761 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp.9B8C2015B4635A97.idx differ diff --git a/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp.7475A9472170E32A.idx b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp.7475A9472170E32A.idx new file mode 100755 index 0000000000000000000000000000000000000000..43e827f7888af17d036b5cdd8e22ca639dc810f7 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp.7475A9472170E32A.idx differ diff --git a/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.5CFCD880070835B7.idx b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.5CFCD880070835B7.idx new file mode 100755 index 0000000000000000000000000000000000000000..7bf0a8cef4ce6d75b7cb9633826182a7ff77d22d Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.5CFCD880070835B7.idx differ diff --git a/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp.15161CADD4CF1397.idx b/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp.15161CADD4CF1397.idx new file mode 100755 index 0000000000000000000000000000000000000000..e68440a9f920628539d11611e2d799498a3278b1 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp.15161CADD4CF1397.idx differ diff --git a/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp.C192E44A2A2B2438.idx b/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp.C192E44A2A2B2438.idx new file mode 100755 index 0000000000000000000000000000000000000000..078d3d3f180ce04d3953c07f580f6aa056903d42 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp.C192E44A2A2B2438.idx differ diff --git a/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.0E4AE68E13A6F5B4.idx b/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.0E4AE68E13A6F5B4.idx new file mode 100755 index 0000000000000000000000000000000000000000..fd398e7c109e4dc7e3ea5617f56a9d9493716e92 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.0E4AE68E13A6F5B4.idx differ diff --git a/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.3B87280201C19511.idx b/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.3B87280201C19511.idx new file mode 100755 index 0000000000000000000000000000000000000000..a10692619d00b03f19c3f6a8e4aa9d98a6ae4de4 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.3B87280201C19511.idx differ diff --git a/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp.E948F8CEDD1EC4BF.idx b/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp.E948F8CEDD1EC4BF.idx new file mode 100755 index 0000000000000000000000000000000000000000..15dea6fb969c60b54755de5d4798200848cb0589 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp.E948F8CEDD1EC4BF.idx differ diff --git a/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp.0BA2333B2B00FEDC.idx b/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp.0BA2333B2B00FEDC.idx new file mode 100755 index 0000000000000000000000000000000000000000..aa062a145dc334db915273113ebb29c4d27c448a Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp.0BA2333B2B00FEDC.idx differ diff --git a/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp.F5D89038BFC386C8.idx b/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp.F5D89038BFC386C8.idx new file mode 100755 index 0000000000000000000000000000000000000000..31e20d101993128d53bf7fbfe8dd0579b5261a71 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp.F5D89038BFC386C8.idx differ diff --git a/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp.935CC5A9EFAF5617.idx b/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp.935CC5A9EFAF5617.idx new file mode 100755 index 0000000000000000000000000000000000000000..78a86e8962f37ed47e1ca9fa2d6c5123000e70f1 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp.935CC5A9EFAF5617.idx differ diff --git a/.cache/clangd/index/device_gemm_add_relu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.81A12F337280E714.idx b/.cache/clangd/index/device_gemm_add_relu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.81A12F337280E714.idx new file mode 100755 index 0000000000000000000000000000000000000000..86f594f8fad028a91df526f998d774366b8f4628 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_relu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.81A12F337280E714.idx differ diff --git a/.cache/clangd/index/device_gemm_add_relu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.6BB8AF1DE23C88B0.idx b/.cache/clangd/index/device_gemm_add_relu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.6BB8AF1DE23C88B0.idx new file mode 100755 index 0000000000000000000000000000000000000000..41883f77dbfddd19ef1664387cb98cd4d722bb80 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_relu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.6BB8AF1DE23C88B0.idx differ diff --git a/.cache/clangd/index/device_gemm_add_silu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.6559BA68564A041C.idx b/.cache/clangd/index/device_gemm_add_silu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.6559BA68564A041C.idx new file mode 100755 index 0000000000000000000000000000000000000000..3e88ec4e8e0efa0966e6154be3e3358bb49b50c1 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_silu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.6559BA68564A041C.idx differ diff --git a/.cache/clangd/index/device_gemm_add_silu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.5F5C907654F27409.idx b/.cache/clangd/index/device_gemm_add_silu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.5F5C907654F27409.idx new file mode 100755 index 0000000000000000000000000000000000000000..c79df4efaa8f41d8f5dba06114ca0989e1d4c947 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_silu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.5F5C907654F27409.idx differ diff --git a/.cache/clangd/index/device_gemm_add_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.8D291954B92003AF.idx b/.cache/clangd/index/device_gemm_add_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.8D291954B92003AF.idx new file mode 100755 index 0000000000000000000000000000000000000000..766c156be9ff64e93ea7387b6739a7c1fbcdd04f Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.8D291954B92003AF.idx differ diff --git a/.cache/clangd/index/device_gemm_add_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.9FCCE6CADB35B88C.idx b/.cache/clangd/index/device_gemm_add_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.9FCCE6CADB35B88C.idx new file mode 100755 index 0000000000000000000000000000000000000000..88403271dc00401690a38970e2026606530beb80 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.9FCCE6CADB35B88C.idx differ diff --git a/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.479D1366D0F3CE30.idx b/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.479D1366D0F3CE30.idx new file mode 100755 index 0000000000000000000000000000000000000000..d0bf400cb16767b0e9585be954561b788e3245e0 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.479D1366D0F3CE30.idx differ diff --git a/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.D6BBA9767E8DA592.idx b/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.D6BBA9767E8DA592.idx new file mode 100755 index 0000000000000000000000000000000000000000..68a19236c4ba8f6d903a5e601a202ab8364112a5 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.D6BBA9767E8DA592.idx differ diff --git a/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.F658EA6412E57274.idx b/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.F658EA6412E57274.idx new file mode 100755 index 0000000000000000000000000000000000000000..b1efd0bd05da4dc07b8dbb4065bc5f045617c4aa Binary files /dev/null and b/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.F658EA6412E57274.idx differ diff --git a/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.D219505C6A3BDF13.idx b/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.D219505C6A3BDF13.idx new file mode 100755 index 0000000000000000000000000000000000000000..d33170e11921395a33dc2e7435bbd12ab4944551 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.D219505C6A3BDF13.idx differ diff --git a/.cache/clangd/index/device_gemm_bias_add_reduce_xdl_cshuffle.hpp.2AAE1676CF63D1B6.idx b/.cache/clangd/index/device_gemm_bias_add_reduce_xdl_cshuffle.hpp.2AAE1676CF63D1B6.idx new file mode 100755 index 0000000000000000000000000000000000000000..2070d7f5428a3a602b251d2e136c51c200675e88 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bias_add_reduce_xdl_cshuffle.hpp.2AAE1676CF63D1B6.idx differ diff --git a/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp.4E5648F8B76EED28.idx b/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp.4E5648F8B76EED28.idx new file mode 100755 index 0000000000000000000000000000000000000000..ba1af861e39a360a3f4b229d7e73fa9e8978139b Binary files /dev/null and b/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp.4E5648F8B76EED28.idx differ diff --git a/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp.51C1492BB7273708.idx b/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp.51C1492BB7273708.idx new file mode 100755 index 0000000000000000000000000000000000000000..bf1f37f91651cb4e4d6a9ac9116ee2903d94038b Binary files /dev/null and b/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp.51C1492BB7273708.idx differ diff --git a/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp.89C496A745057432.idx b/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp.89C496A745057432.idx new file mode 100755 index 0000000000000000000000000000000000000000..87e5bdee019d15fea17f4f9dd49a0ad678438ed5 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp.89C496A745057432.idx differ diff --git a/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp.DA4EFE2E10A56944.idx b/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp.DA4EFE2E10A56944.idx new file mode 100755 index 0000000000000000000000000000000000000000..fd5d15c05893e9d71b07b8ec5b0938932ad471f0 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp.DA4EFE2E10A56944.idx differ diff --git a/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp.80BE80F117BA699A.idx b/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp.80BE80F117BA699A.idx new file mode 100755 index 0000000000000000000000000000000000000000..362f06a72f6bfd4683b7e0d2e73b7a605fd504eb Binary files /dev/null and b/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp.80BE80F117BA699A.idx differ diff --git a/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp.E76894F842B8152E.idx b/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp.E76894F842B8152E.idx new file mode 100755 index 0000000000000000000000000000000000000000..d7f1457738c777f154da0dc994389bad50eec7ab Binary files /dev/null and b/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp.E76894F842B8152E.idx differ diff --git a/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp.741D3DF3032F48B6.idx b/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp.741D3DF3032F48B6.idx new file mode 100755 index 0000000000000000000000000000000000000000..3d85705d8b2b2daf4e3c9e25ad05d13031664130 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp.741D3DF3032F48B6.idx differ diff --git a/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp.E3C0C7076B1A5420.idx b/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp.E3C0C7076B1A5420.idx new file mode 100755 index 0000000000000000000000000000000000000000..848d751cc49c17637634a137e1a2867c1add9de3 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp.E3C0C7076B1A5420.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp.hpp.86A83F148A978F3B.idx b/.cache/clangd/index/device_gemm_dpp.hpp.86A83F148A978F3B.idx new file mode 100755 index 0000000000000000000000000000000000000000..7438ad4a5fc8ab507f991b277118c285f28eed7e Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp.hpp.86A83F148A978F3B.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp.AC3765D99685AA83.idx b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp.AC3765D99685AA83.idx new file mode 100755 index 0000000000000000000000000000000000000000..ac690f6bfbc7d0dce10b6867f9b308a4ff1ef47d Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp.AC3765D99685AA83.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp.BE2169842E629D5F.idx b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp.BE2169842E629D5F.idx new file mode 100755 index 0000000000000000000000000000000000000000..1e2c6fcd3645574abcd9f332d4f4df7cf996464f Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp.BE2169842E629D5F.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp.C525CCAE82B88202.idx b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp.C525CCAE82B88202.idx new file mode 100755 index 0000000000000000000000000000000000000000..68c67561f60fa0fcb89cca439de31ef4c0936274 Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp.C525CCAE82B88202.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp.E70AE989E444006E.idx b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp.E70AE989E444006E.idx new file mode 100755 index 0000000000000000000000000000000000000000..9ee5d5b508ecd2334d7e80ca9fd21d81eab4d2e7 Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp.E70AE989E444006E.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp.F48BB29B1F2C3D07.idx b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp.F48BB29B1F2C3D07.idx new file mode 100755 index 0000000000000000000000000000000000000000..938419e3cc1f6f99011aa611cd737447743a37a2 Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp.F48BB29B1F2C3D07.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp.9E7DF32B7E3B06F9.idx b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp.9E7DF32B7E3B06F9.idx new file mode 100755 index 0000000000000000000000000000000000000000..f513997e262dfbd97fbee257dcd3e3f54d2cf870 Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp.9E7DF32B7E3B06F9.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp.8E876A3BCC68461F.idx b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp.8E876A3BCC68461F.idx new file mode 100755 index 0000000000000000000000000000000000000000..23c19d8bc9fc300947a93b9facb0eb9cccfb3897 Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp.8E876A3BCC68461F.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp.F358D66C9DE4EC29.idx b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp.F358D66C9DE4EC29.idx new file mode 100755 index 0000000000000000000000000000000000000000..f6f4efbf17f9b8a3c1023b7a8c040173039f13bd Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp.F358D66C9DE4EC29.idx differ diff --git a/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp.B5661BE447C6D950.idx b/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp.B5661BE447C6D950.idx new file mode 100755 index 0000000000000000000000000000000000000000..6fe332601f37bdc3ee3f8af254d901cf23966685 Binary files /dev/null and b/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp.B5661BE447C6D950.idx differ diff --git a/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp.E716A8264A34B5D8.idx b/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp.E716A8264A34B5D8.idx new file mode 100755 index 0000000000000000000000000000000000000000..3bb46e607be03eb1d208f455637e706a565c55c9 Binary files /dev/null and b/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp.E716A8264A34B5D8.idx differ diff --git a/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp.B8E2B7EF254C9C8D.idx b/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp.B8E2B7EF254C9C8D.idx new file mode 100755 index 0000000000000000000000000000000000000000..2cb73641125c8aec11a0fe90fd0960d0f6c4aa50 Binary files /dev/null and b/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp.B8E2B7EF254C9C8D.idx differ diff --git a/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp.4104F408858A9B41.idx b/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp.4104F408858A9B41.idx new file mode 100755 index 0000000000000000000000000000000000000000..fdd230bbd1ae39b95fc2d31ac73fecb2643f2a4a Binary files /dev/null and b/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp.4104F408858A9B41.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_abd.hpp.01CABF5AC7B8D71E.idx b/.cache/clangd/index/device_gemm_multiple_abd.hpp.01CABF5AC7B8D71E.idx new file mode 100755 index 0000000000000000000000000000000000000000..9979312f957b11dd11b84282cb7eb5b50271dd84 Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_abd.hpp.01CABF5AC7B8D71E.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_abd_xdl_cshuffle.hpp.6B55B3AF2C69EA61.idx b/.cache/clangd/index/device_gemm_multiple_abd_xdl_cshuffle.hpp.6B55B3AF2C69EA61.idx new file mode 100755 index 0000000000000000000000000000000000000000..a73f60982dcf028b6470e9cd325e65bd9f9ecc8c Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_abd_xdl_cshuffle.hpp.6B55B3AF2C69EA61.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_d.hpp.A369BE7D23825A77.idx b/.cache/clangd/index/device_gemm_multiple_d.hpp.A369BE7D23825A77.idx new file mode 100755 index 0000000000000000000000000000000000000000..980ec748393331f880145b7d9b9711fee50405c2 Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_d.hpp.A369BE7D23825A77.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_d_layernorm.hpp.1996D4382B7902A4.idx b/.cache/clangd/index/device_gemm_multiple_d_layernorm.hpp.1996D4382B7902A4.idx new file mode 100755 index 0000000000000000000000000000000000000000..774fb1235d617418ee58e02f783c34863a3d8df2 Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_d_layernorm.hpp.1996D4382B7902A4.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp.6897EB3658A7024F.idx b/.cache/clangd/index/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp.6897EB3658A7024F.idx new file mode 100755 index 0000000000000000000000000000000000000000..74c94420ea2d67967b98d7012aeca6d59e4f8855 Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp.6897EB3658A7024F.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_d_multiple_r.hpp.D51C7B3444D17F61.idx b/.cache/clangd/index/device_gemm_multiple_d_multiple_r.hpp.D51C7B3444D17F61.idx new file mode 100755 index 0000000000000000000000000000000000000000..3b1ebace70bafe7d11769637bac2982f4a75d968 Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_d_multiple_r.hpp.D51C7B3444D17F61.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp.76B6E1492C6F98CC.idx b/.cache/clangd/index/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp.76B6E1492C6F98CC.idx new file mode 100755 index 0000000000000000000000000000000000000000..28dd35d2d37073ccd155bcde89ee5a40f5a7ce9a Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp.76B6E1492C6F98CC.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_d_wmma_cshuffle.hpp.5BE3B8B060B3F7DD.idx b/.cache/clangd/index/device_gemm_multiple_d_wmma_cshuffle.hpp.5BE3B8B060B3F7DD.idx new file mode 100755 index 0000000000000000000000000000000000000000..1960392bb3681072584d40c109a23370ae572dbe Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_d_wmma_cshuffle.hpp.5BE3B8B060B3F7DD.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_d_xdl_cshuffle.hpp.A234525D7676EE3C.idx b/.cache/clangd/index/device_gemm_multiple_d_xdl_cshuffle.hpp.A234525D7676EE3C.idx new file mode 100755 index 0000000000000000000000000000000000000000..a073ccd3090bcbe28bde3809d443470a27fceb98 Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_d_xdl_cshuffle.hpp.A234525D7676EE3C.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp.D18AD937E7A47E2A.idx b/.cache/clangd/index/device_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp.D18AD937E7A47E2A.idx new file mode 100755 index 0000000000000000000000000000000000000000..d5c706af3c8c71e6542a4562a3908f5611f03912 Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp.D18AD937E7A47E2A.idx differ diff --git a/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.46B874AA40214074.idx b/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.46B874AA40214074.idx new file mode 100755 index 0000000000000000000000000000000000000000..34046311ab132c871f0dcb71cb0398a815a1a8a9 Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.46B874AA40214074.idx differ diff --git a/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.0EE0667EFBB4A693.idx b/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.0EE0667EFBB4A693.idx new file mode 100755 index 0000000000000000000000000000000000000000..2733245b385853455f5ddb8938427ba095e2f356 Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.0EE0667EFBB4A693.idx differ diff --git a/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp.4B2A61CBEBB69C46.idx b/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp.4B2A61CBEBB69C46.idx new file mode 100755 index 0000000000000000000000000000000000000000..a45cc88f7db30aa614ee35fb86fe0be7f862780d Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp.4B2A61CBEBB69C46.idx differ diff --git a/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp.E45F1434A64DD3E5.idx b/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp.E45F1434A64DD3E5.idx new file mode 100755 index 0000000000000000000000000000000000000000..ce3d33aa9b6207cc5906e9a802d5d4bff171510b Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp.E45F1434A64DD3E5.idx differ diff --git a/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp.35BB7F78B886DD68.idx b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp.35BB7F78B886DD68.idx new file mode 100755 index 0000000000000000000000000000000000000000..6cb217abda50464b900dab0d63363dcc0f212b1d Binary files /dev/null and b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp.35BB7F78B886DD68.idx differ diff --git a/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.6F38C058A13FF1D3.idx b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.6F38C058A13FF1D3.idx new file mode 100755 index 0000000000000000000000000000000000000000..85b9cc6bb74a225c1257fbfb84e737374efbfd86 Binary files /dev/null and b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.6F38C058A13FF1D3.idx differ diff --git a/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.0B3E6146A79A7808.idx b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.0B3E6146A79A7808.idx new file mode 100755 index 0000000000000000000000000000000000000000..5f051e6ab07dfd58ea370ebffc7ee8a0ace5ccc5 Binary files /dev/null and b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.0B3E6146A79A7808.idx differ diff --git a/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.487F7DBAC716F5E6.idx b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.487F7DBAC716F5E6.idx new file mode 100755 index 0000000000000000000000000000000000000000..f12a5373a740994a81c8ce32efe29379575ba2cf Binary files /dev/null and b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.487F7DBAC716F5E6.idx differ diff --git a/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.0D311D4C9F7E0828.idx b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.0D311D4C9F7E0828.idx new file mode 100755 index 0000000000000000000000000000000000000000..846dadfcc816df2d38ad7673fb0b0e6c16f82f9b Binary files /dev/null and b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.0D311D4C9F7E0828.idx differ diff --git a/.cache/clangd/index/device_gemm_reduce.hpp.536083621BE180B4.idx b/.cache/clangd/index/device_gemm_reduce.hpp.536083621BE180B4.idx new file mode 100755 index 0000000000000000000000000000000000000000..02cac28c2798749ce09346313dab94531493d932 Binary files /dev/null and b/.cache/clangd/index/device_gemm_reduce.hpp.536083621BE180B4.idx differ diff --git a/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle.hpp.E3AE2B4768599A5C.idx b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle.hpp.E3AE2B4768599A5C.idx new file mode 100755 index 0000000000000000000000000000000000000000..3e2ccc81a17db29e861ccde08a338a20ec8041c5 Binary files /dev/null and b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle.hpp.E3AE2B4768599A5C.idx differ diff --git a/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.7921F720A3C738B2.idx b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.7921F720A3C738B2.idx new file mode 100755 index 0000000000000000000000000000000000000000..fc1676cf58446017ac2f541e7f0d4459453352d9 Binary files /dev/null and b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.7921F720A3C738B2.idx differ diff --git a/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.67A4E982DE011DE5.idx b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.67A4E982DE011DE5.idx new file mode 100755 index 0000000000000000000000000000000000000000..1bdf3a987a5214c2d01dd38159d7ab45f2619a99 Binary files /dev/null and b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.67A4E982DE011DE5.idx differ diff --git a/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.18986AD46051D542.idx b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.18986AD46051D542.idx new file mode 100755 index 0000000000000000000000000000000000000000..f28c28dda15dee0bd72394f2b89a008c6e27aeaa Binary files /dev/null and b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.18986AD46051D542.idx differ diff --git a/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.93B77417ED344C2F.idx b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.93B77417ED344C2F.idx new file mode 100755 index 0000000000000000000000000000000000000000..3f30237b4de075b6415ea25e08e7e1cbe004ea4c Binary files /dev/null and b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.93B77417ED344C2F.idx differ diff --git a/.cache/clangd/index/device_gemm_splitk.hpp.CFD1B5B13FAD53CD.idx b/.cache/clangd/index/device_gemm_splitk.hpp.CFD1B5B13FAD53CD.idx new file mode 100755 index 0000000000000000000000000000000000000000..12132378d9f7fc59de394294a1a5de8b0c3f351f Binary files /dev/null and b/.cache/clangd/index/device_gemm_splitk.hpp.CFD1B5B13FAD53CD.idx differ diff --git a/.cache/clangd/index/device_gemm_streamk.hpp.75A52E82DB5C165B.idx b/.cache/clangd/index/device_gemm_streamk.hpp.75A52E82DB5C165B.idx new file mode 100755 index 0000000000000000000000000000000000000000..6b7f740feed43d677b42223d33753e16dd1658ac Binary files /dev/null and b/.cache/clangd/index/device_gemm_streamk.hpp.75A52E82DB5C165B.idx differ diff --git a/.cache/clangd/index/device_gemm_wmma.hpp.32E8352ACBB1259F.idx b/.cache/clangd/index/device_gemm_wmma.hpp.32E8352ACBB1259F.idx new file mode 100755 index 0000000000000000000000000000000000000000..24298ab10f4de50d24b0a8124454b61de197ba6e Binary files /dev/null and b/.cache/clangd/index/device_gemm_wmma.hpp.32E8352ACBB1259F.idx differ diff --git a/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_km_kn_mn_instance.cpp.48B71A3DBBF3DF70.idx b/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_km_kn_mn_instance.cpp.48B71A3DBBF3DF70.idx new file mode 100755 index 0000000000000000000000000000000000000000..faf863067fad3873b4679ea3904ae6131a328f99 Binary files /dev/null and b/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_km_kn_mn_instance.cpp.48B71A3DBBF3DF70.idx differ diff --git a/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_km_nk_mn_instance.cpp.9F95D8CF713B4DFB.idx b/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_km_nk_mn_instance.cpp.9F95D8CF713B4DFB.idx new file mode 100755 index 0000000000000000000000000000000000000000..03f25b73ac9324239f13bfb318e8b90fcc96eb8a Binary files /dev/null and b/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_km_nk_mn_instance.cpp.9F95D8CF713B4DFB.idx differ diff --git a/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_mk_kn_mn_instance.cpp.4656D94005983107.idx b/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_mk_kn_mn_instance.cpp.4656D94005983107.idx new file mode 100755 index 0000000000000000000000000000000000000000..39e7f7f4009ab35944d2097113c02780b5313c02 Binary files /dev/null and b/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_mk_kn_mn_instance.cpp.4656D94005983107.idx differ diff --git a/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_mk_nk_mn_instance.cpp.92DF213C04CE83E0.idx b/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_mk_nk_mn_instance.cpp.92DF213C04CE83E0.idx new file mode 100755 index 0000000000000000000000000000000000000000..117769d3b0c447dc0771d17081281f4e5b6feb01 Binary files /dev/null and b/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_mk_nk_mn_instance.cpp.92DF213C04CE83E0.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl.hpp.8498EA503EEB08EE.idx b/.cache/clangd/index/device_gemm_xdl.hpp.8498EA503EEB08EE.idx new file mode 100755 index 0000000000000000000000000000000000000000..8f6ffd0d2bcfb02fd0bb4484ee937a83a308bc92 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl.hpp.8498EA503EEB08EE.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp.F73590DBD2CB1EF5.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp.F73590DBD2CB1EF5.idx new file mode 100755 index 0000000000000000000000000000000000000000..f2b198cd46fb8fb21720ed060f296f4f46640b98 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp.F73590DBD2CB1EF5.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp.544964C8084D9197.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp.544964C8084D9197.idx new file mode 100755 index 0000000000000000000000000000000000000000..a56065d3cdb248bcfe33c97d8a5a423949490d89 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp.544964C8084D9197.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp.6E89B9B9359CE514.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp.6E89B9B9359CE514.idx new file mode 100755 index 0000000000000000000000000000000000000000..c34436ac5b37917dda93af7a83be1caf3e6de8d5 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp.6E89B9B9359CE514.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp.2346E4E4EAE3DC7F.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp.2346E4E4EAE3DC7F.idx new file mode 100755 index 0000000000000000000000000000000000000000..aa88835956651e2605c811b9cc57c76a08c78491 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp.2346E4E4EAE3DC7F.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp.DCA2A91E3CF88135.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp.DCA2A91E3CF88135.idx new file mode 100755 index 0000000000000000000000000000000000000000..46dd055c5ef2a0e9f4be5851d2699aa0f967ee4d Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp.DCA2A91E3CF88135.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp.52C4510F756E0106.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp.52C4510F756E0106.idx new file mode 100755 index 0000000000000000000000000000000000000000..419aa484c41b08ed8c52cae169384d0c80cbca7c Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp.52C4510F756E0106.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp.11CF3D24C90FB35A.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp.11CF3D24C90FB35A.idx new file mode 100755 index 0000000000000000000000000000000000000000..01a0c17c219ecdad25bccf7df7139444060263be Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp.11CF3D24C90FB35A.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp.3180F054887BBB56.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp.3180F054887BBB56.idx new file mode 100755 index 0000000000000000000000000000000000000000..2f8932c498f2712eca44b87af074e897bc49ea10 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp.3180F054887BBB56.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp.CA124F6888DE0DFE.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp.CA124F6888DE0DFE.idx new file mode 100755 index 0000000000000000000000000000000000000000..c5e074c84a9cde062c34530787995b7d0f739f37 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp.CA124F6888DE0DFE.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp.5B0B95461D582E5E.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp.5B0B95461D582E5E.idx new file mode 100755 index 0000000000000000000000000000000000000000..d39e9b15d33150190da2ccb56cd9ed251aeeb1f7 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp.5B0B95461D582E5E.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp.144394AC7C436910.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp.144394AC7C436910.idx new file mode 100755 index 0000000000000000000000000000000000000000..2569757086bb723eea448e9bf7efb8ee62de8439 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp.144394AC7C436910.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp.9C26807545ECBC7F.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp.9C26807545ECBC7F.idx new file mode 100755 index 0000000000000000000000000000000000000000..d101717ed41ac6ea03e77a507caae8b1ce6707ba Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp.9C26807545ECBC7F.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp.48B6F9C0E69BB63A.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp.48B6F9C0E69BB63A.idx new file mode 100755 index 0000000000000000000000000000000000000000..11f9ac7d2dd9f59d49ecc232f5ea6631d4519716 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp.48B6F9C0E69BB63A.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp.FF4791DC4DC38C18.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp.FF4791DC4DC38C18.idx new file mode 100755 index 0000000000000000000000000000000000000000..4d96fd6b5c46a3c7b4573e20298ac3fd20b85049 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp.FF4791DC4DC38C18.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp.6D261DF3AD15EA25.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp.6D261DF3AD15EA25.idx new file mode 100755 index 0000000000000000000000000000000000000000..c7d5f8dbae2f450cd2acf7650ab2c8f929150c89 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp.6D261DF3AD15EA25.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_default_instance.cpp.FA9B7FC9807074DA.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_default_instance.cpp.FA9B7FC9807074DA.idx new file mode 100755 index 0000000000000000000000000000000000000000..4cac5b37b1c9b720faa0571244b2002ac7a8e74d Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_default_instance.cpp.FA9B7FC9807074DA.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_instance.hpp.E10E580D456628E1.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_instance.hpp.E10E580D456628E1.idx new file mode 100755 index 0000000000000000000000000000000000000000..eced73fe4b69d88afc9fdeb520ddd1ab9c0360a8 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_instance.hpp.E10E580D456628E1.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_default_instance.cpp.030C33F297B655BC.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_default_instance.cpp.030C33F297B655BC.idx new file mode 100755 index 0000000000000000000000000000000000000000..125ac5b2bd4460db2b900bc8e66b3aa4c31dc4bb Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_default_instance.cpp.030C33F297B655BC.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_instance.hpp.C8A37285FABA85B1.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_instance.hpp.C8A37285FABA85B1.idx new file mode 100755 index 0000000000000000000000000000000000000000..56a26dd6e17d52bb1abd3fcbed0efc2e37da679e Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_instance.hpp.C8A37285FABA85B1.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_padded_instance.cpp.1333260C2CA69DB3.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_padded_instance.cpp.1333260C2CA69DB3.idx new file mode 100755 index 0000000000000000000000000000000000000000..33cce0cb45afccbe47e01a699b1410f2bef7f5c8 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_padded_instance.cpp.1333260C2CA69DB3.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_padded_instance.cpp.B5F4680BE43A28CE.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_padded_instance.cpp.B5F4680BE43A28CE.idx new file mode 100755 index 0000000000000000000000000000000000000000..278de03dbea57872e1d7a41dece05f3eedb294fe Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_padded_instance.cpp.B5F4680BE43A28CE.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_default_instance.cpp.4093668F9B2A46D0.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_default_instance.cpp.4093668F9B2A46D0.idx new file mode 100755 index 0000000000000000000000000000000000000000..fbf55208db2c11f36383fdf375aa3b551ab94dac Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_default_instance.cpp.4093668F9B2A46D0.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_instance.hpp.A50E70F0D93B6F8E.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_instance.hpp.A50E70F0D93B6F8E.idx new file mode 100755 index 0000000000000000000000000000000000000000..17e95b747ee6e19632a73e87eb8681471994773f Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_instance.hpp.A50E70F0D93B6F8E.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_padded_instance.cpp.74A0C6504550F5D4.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_padded_instance.cpp.74A0C6504550F5D4.idx new file mode 100755 index 0000000000000000000000000000000000000000..a3db28d1a6184af53e06a3d68deeb96eaaa83a6e Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_padded_instance.cpp.74A0C6504550F5D4.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp.A977355E082D2CA2.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp.A977355E082D2CA2.idx new file mode 100755 index 0000000000000000000000000000000000000000..97a5cf453ca46f2ed423bcebac2f093c803476ef Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp.A977355E082D2CA2.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.2559A4A420F11829.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.2559A4A420F11829.idx new file mode 100755 index 0000000000000000000000000000000000000000..6bb143ea472cae935041e38849d25695217489f1 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.2559A4A420F11829.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.513C881E10886D27.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.513C881E10886D27.idx new file mode 100755 index 0000000000000000000000000000000000000000..60e3075fbf0455b4c1c4b9b21b1de5bb34beb877 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.513C881E10886D27.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.43610B5AFBA5C7DA.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.43610B5AFBA5C7DA.idx new file mode 100755 index 0000000000000000000000000000000000000000..d49faec7331809623634b19965a2354bd780d98e Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.43610B5AFBA5C7DA.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.448FFB8EF559CCD0.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.448FFB8EF559CCD0.idx new file mode 100755 index 0000000000000000000000000000000000000000..17365c980235aece2c3bb925b54a4bb89f49b419 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.448FFB8EF559CCD0.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp.6CE81023C2E130B0.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp.6CE81023C2E130B0.idx new file mode 100755 index 0000000000000000000000000000000000000000..ee36d671d02979ab9992e7ef39bfc95b80124862 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp.6CE81023C2E130B0.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_kn_mn_instance.cpp.62E7711AB97BA061.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_kn_mn_instance.cpp.62E7711AB97BA061.idx new file mode 100755 index 0000000000000000000000000000000000000000..7dd5f5275e7c4c60798e98a12663a494347726a8 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_kn_mn_instance.cpp.62E7711AB97BA061.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_nk_mn_instance.cpp.FC50F49FB1ACD418.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_nk_mn_instance.cpp.FC50F49FB1ACD418.idx new file mode 100755 index 0000000000000000000000000000000000000000..c5d0c0725e97bb84ffb74b510c4f2e006c187bdf Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_nk_mn_instance.cpp.FC50F49FB1ACD418.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_kn_mn_instance.cpp.29C6735FE1834089.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_kn_mn_instance.cpp.29C6735FE1834089.idx new file mode 100755 index 0000000000000000000000000000000000000000..511d1a6ea3989ec5441ec5ed51d83831254f4c06 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_kn_mn_instance.cpp.29C6735FE1834089.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_nk_mn_instance.cpp.2E316B49437298E2.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_nk_mn_instance.cpp.2E316B49437298E2.idx new file mode 100755 index 0000000000000000000000000000000000000000..8e13281a5c1355c7bd1195e441292b4b01fe1f42 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_nk_mn_instance.cpp.2E316B49437298E2.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_cshuffle.hpp.54159ABC735BA0BB.idx b/.cache/clangd/index/device_gemm_xdl_cshuffle.hpp.54159ABC735BA0BB.idx new file mode 100755 index 0000000000000000000000000000000000000000..13e853f1b77583d78ca7525219c51a3a6165b8d4 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_cshuffle.hpp.54159ABC735BA0BB.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_cshuffle_lds_direct_load.hpp.C2FB5C5C6E8BC96E.idx b/.cache/clangd/index/device_gemm_xdl_cshuffle_lds_direct_load.hpp.C2FB5C5C6E8BC96E.idx new file mode 100755 index 0000000000000000000000000000000000000000..70dc378421b2366b891b68ee03b38132e5dc2563 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_cshuffle_lds_direct_load.hpp.C2FB5C5C6E8BC96E.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_cshuffle_v2.hpp.6D300E1E67F0C8FA.idx b/.cache/clangd/index/device_gemm_xdl_cshuffle_v2.hpp.6D300E1E67F0C8FA.idx new file mode 100755 index 0000000000000000000000000000000000000000..8328c74ba6bf5d2d52f277023873192a2924f33c Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_cshuffle_v2.hpp.6D300E1E67F0C8FA.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp.9F68ED656E988D30.idx b/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp.9F68ED656E988D30.idx new file mode 100755 index 0000000000000000000000000000000000000000..e4ac81032af97d39490bd7f2205d70e60e036841 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp.9F68ED656E988D30.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp.FA79EAE16FB6F44E.idx b/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp.FA79EAE16FB6F44E.idx new file mode 100755 index 0000000000000000000000000000000000000000..d96f25739d67f5e06322dce3af6a979305233d6d Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp.FA79EAE16FB6F44E.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp.F19AC2A6D8DD58E6.idx b/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp.F19AC2A6D8DD58E6.idx new file mode 100755 index 0000000000000000000000000000000000000000..e9328383f7dad384b1177e379093f88e733ccdc5 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp.F19AC2A6D8DD58E6.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp.478DBC5276E71817.idx b/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp.478DBC5276E71817.idx new file mode 100755 index 0000000000000000000000000000000000000000..e00d3f16d3f890f722752bc08eaf49d1f9221531 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp.478DBC5276E71817.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp.1759B855C9E56265.idx b/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp.1759B855C9E56265.idx new file mode 100755 index 0000000000000000000000000000000000000000..659176bd2841687b03dbdf8cad434debf694f7db Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp.1759B855C9E56265.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp.9CBE0B752A41740B.idx b/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp.9CBE0B752A41740B.idx new file mode 100755 index 0000000000000000000000000000000000000000..b66c6dbde0ce582910746811b7331a1a8ad5c8d4 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp.9CBE0B752A41740B.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp.2A9A70463B0B1D69.idx b/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp.2A9A70463B0B1D69.idx new file mode 100755 index 0000000000000000000000000000000000000000..9cec5018998756fe410cd3db2cfe729b94f4e271 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp.2A9A70463B0B1D69.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp.E46BEEC813D33535.idx b/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp.E46BEEC813D33535.idx new file mode 100755 index 0000000000000000000000000000000000000000..6a62f99a2dea918b00b1978f8b0b87aae1a700d7 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp.E46BEEC813D33535.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_layernorm_cshuffle.hpp.7536F92E23B638A6.idx b/.cache/clangd/index/device_gemm_xdl_layernorm_cshuffle.hpp.7536F92E23B638A6.idx new file mode 100755 index 0000000000000000000000000000000000000000..f7af9a00b282635f3122d5efc564399367326839 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_layernorm_cshuffle.hpp.7536F92E23B638A6.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_skip_b_lds.hpp.31130252724E07CF.idx b/.cache/clangd/index/device_gemm_xdl_skip_b_lds.hpp.31130252724E07CF.idx new file mode 100755 index 0000000000000000000000000000000000000000..278f35dd60a2b2077b2ca304d2818718f8259f72 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_skip_b_lds.hpp.31130252724E07CF.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_c_shuffle.hpp.D2F306B1868E426C.idx b/.cache/clangd/index/device_gemm_xdl_splitk_c_shuffle.hpp.D2F306B1868E426C.idx new file mode 100755 index 0000000000000000000000000000000000000000..63246b85bcc45fef174f284d265ef684639190fc Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_c_shuffle.hpp.D2F306B1868E426C.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_c_shuffle_lds_direct_load.hpp.C1DEA5CF04C380FB.idx b/.cache/clangd/index/device_gemm_xdl_splitk_c_shuffle_lds_direct_load.hpp.C1DEA5CF04C380FB.idx new file mode 100755 index 0000000000000000000000000000000000000000..a3fc917b14598937a0cf1c34c6a332944cd73cfb Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_c_shuffle_lds_direct_load.hpp.C1DEA5CF04C380FB.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp.5C4328570228C900.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp.5C4328570228C900.idx new file mode 100755 index 0000000000000000000000000000000000000000..68b33656fc29087166b6b8a81007c40cd1bbb61d Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp.5C4328570228C900.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp.8E11BCA1E8B40C54.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp.8E11BCA1E8B40C54.idx new file mode 100755 index 0000000000000000000000000000000000000000..b78f114d23858bc5848e6955172dc21d90fe8773 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp.8E11BCA1E8B40C54.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp.02602D130B49C5E9.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp.02602D130B49C5E9.idx new file mode 100755 index 0000000000000000000000000000000000000000..8f8579630d0979a01b4d3e65e79550f883d734a8 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp.02602D130B49C5E9.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp.76573A416742936A.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp.76573A416742936A.idx new file mode 100755 index 0000000000000000000000000000000000000000..043cf81f0e04a96635475ff1c85b4517ec8dca1a Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp.76573A416742936A.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp.80C5AB0FEC7DFC63.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp.80C5AB0FEC7DFC63.idx new file mode 100755 index 0000000000000000000000000000000000000000..4b8fc1f16d396374b8f737f6ee3e1b17ff17ece7 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp.80C5AB0FEC7DFC63.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp.073EE92A5EAA844A.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp.073EE92A5EAA844A.idx new file mode 100755 index 0000000000000000000000000000000000000000..dec47fbf6d43f255e9d3ef3775e57ad06a22da79 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp.073EE92A5EAA844A.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_instance.cpp.98C08C6647949567.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_instance.cpp.98C08C6647949567.idx new file mode 100755 index 0000000000000000000000000000000000000000..abbe2d584a72c50ff74586cc9c7a6abfc501db15 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_instance.cpp.98C08C6647949567.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_instance.cpp.436428D0A84C0AFE.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_instance.cpp.436428D0A84C0AFE.idx new file mode 100755 index 0000000000000000000000000000000000000000..603cd1b65849a7b9512670473a50f765919fdc03 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_instance.cpp.436428D0A84C0AFE.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_irregular_instance.cpp.5798AD2D53C823E4.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_irregular_instance.cpp.5798AD2D53C823E4.idx new file mode 100755 index 0000000000000000000000000000000000000000..009a4c3baa1ac80b73057cc10e0eae2d7d5ccce5 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_irregular_instance.cpp.5798AD2D53C823E4.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_irregular_instance.cpp.34C01D1CCA40C5F9.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_irregular_instance.cpp.34C01D1CCA40C5F9.idx new file mode 100755 index 0000000000000000000000000000000000000000..45ff2bc4b0b7cb6fab039570c406f97bfbe0941b Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_irregular_instance.cpp.34C01D1CCA40C5F9.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_instance.cpp.2C6EF29B32E3350B.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_instance.cpp.2C6EF29B32E3350B.idx new file mode 100755 index 0000000000000000000000000000000000000000..2596bf20600584be9fe3bbbd30f53f62404e55dd Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_instance.cpp.2C6EF29B32E3350B.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_irregular_instance.cpp.483FB0E67DC82A12.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_irregular_instance.cpp.483FB0E67DC82A12.idx new file mode 100755 index 0000000000000000000000000000000000000000..1b744d0e9db8f251d32ee19c4a2aad35798d9eb7 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_irregular_instance.cpp.483FB0E67DC82A12.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_instance.cpp.04904E13881A008D.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_instance.cpp.04904E13881A008D.idx new file mode 100755 index 0000000000000000000000000000000000000000..4dd2a49c9613617371a2c75d66213f2a171c5bec Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_instance.cpp.04904E13881A008D.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_instance.cpp.BF7C969BEF21A812.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_instance.cpp.BF7C969BEF21A812.idx new file mode 100755 index 0000000000000000000000000000000000000000..7ccdbc93cc01c32d4ab0a95e1c441b4468156529 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_instance.cpp.BF7C969BEF21A812.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_irregular_instance.cpp.11676556038472A5.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_irregular_instance.cpp.11676556038472A5.idx new file mode 100755 index 0000000000000000000000000000000000000000..50edc4f7f25a90def58f072e66e103bcd3d50e15 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_irregular_instance.cpp.11676556038472A5.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_irregular_instance.cpp.E685A8514CD614D5.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_irregular_instance.cpp.E685A8514CD614D5.idx new file mode 100755 index 0000000000000000000000000000000000000000..52d9d83c8271037eea8ebd856e946ef15d765075 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_irregular_instance.cpp.E685A8514CD614D5.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_instance.cpp.02D5EB3BF76A9EDC.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_instance.cpp.02D5EB3BF76A9EDC.idx new file mode 100755 index 0000000000000000000000000000000000000000..276951e0f095a24d8ef88e890e28421d48584b51 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_instance.cpp.02D5EB3BF76A9EDC.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_irregular_instance.cpp.235E5C5B653C03E0.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_irregular_instance.cpp.235E5C5B653C03E0.idx new file mode 100755 index 0000000000000000000000000000000000000000..794a3c1b0a72a82c64268f9541320b55e6ca1abe Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_irregular_instance.cpp.235E5C5B653C03E0.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp.37CD0D23BDED400B.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp.37CD0D23BDED400B.idx new file mode 100755 index 0000000000000000000000000000000000000000..d065b34c36dc44fbd03ac908fa558a9f271810cc Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp.37CD0D23BDED400B.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp.2924213EB866E058.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp.2924213EB866E058.idx new file mode 100755 index 0000000000000000000000000000000000000000..5f129eb71f573834673897c4ea6943a6dc888bc1 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp.2924213EB866E058.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_irregular_instance.cpp.3FB0D5B09C446777.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_irregular_instance.cpp.3FB0D5B09C446777.idx new file mode 100755 index 0000000000000000000000000000000000000000..1063932150e1255b5849ee8c9dfc2c408e2dc50a Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_irregular_instance.cpp.3FB0D5B09C446777.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_instance.cpp.7D14F328651204D1.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_instance.cpp.7D14F328651204D1.idx new file mode 100755 index 0000000000000000000000000000000000000000..440b0cd74f175e5360a62d83b8193ed1e84b23f3 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_instance.cpp.7D14F328651204D1.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_interwave_instance.cpp.8A9B71EB3B2C8B82.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_interwave_instance.cpp.8A9B71EB3B2C8B82.idx new file mode 100755 index 0000000000000000000000000000000000000000..217d397ea63c90a8fc77978337d8380fbc4db149 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_interwave_instance.cpp.8A9B71EB3B2C8B82.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v2_instance.cpp.CF381829133E9AFB.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v2_instance.cpp.CF381829133E9AFB.idx new file mode 100755 index 0000000000000000000000000000000000000000..974952dadc8b3965b87a91dbe27e9ec74db1b3f3 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v2_instance.cpp.CF381829133E9AFB.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_kpb128_instance.cpp.117C2517C948C007.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_kpb128_instance.cpp.117C2517C948C007.idx new file mode 100755 index 0000000000000000000000000000000000000000..4d2b66e4f3cc1e7a660ede77c035760aaf516a23 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_kpb128_instance.cpp.117C2517C948C007.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_instance.cpp.C13174B8631F1EA5.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_instance.cpp.C13174B8631F1EA5.idx new file mode 100755 index 0000000000000000000000000000000000000000..be942abff4fcfc21a64587bb25b991ba575ac86a Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_instance.cpp.C13174B8631F1EA5.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_interwave_instance.cpp.8596E811350866B4.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_interwave_instance.cpp.8596E811350866B4.idx new file mode 100755 index 0000000000000000000000000000000000000000..3d5815f56c85b2ae288b9a2e24a5267231034a64 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_interwave_instance.cpp.8596E811350866B4.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v2_instance.cpp.09DE6214A3D6FEFC.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v2_instance.cpp.09DE6214A3D6FEFC.idx new file mode 100755 index 0000000000000000000000000000000000000000..90d0939061a1495e00fe4a61bcba7d20d94a7949 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v2_instance.cpp.09DE6214A3D6FEFC.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp.515130D1DB38B8AB.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp.515130D1DB38B8AB.idx new file mode 100755 index 0000000000000000000000000000000000000000..829ae55a62e4620ee54d42613d4a0b81bf1e64a7 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp.515130D1DB38B8AB.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp.252C84373128A03D.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp.252C84373128A03D.idx new file mode 100755 index 0000000000000000000000000000000000000000..5ea6218b0cf1ab5aa72ad601c02d33f137f73906 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp.252C84373128A03D.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp.6BA0BD5269CBABBA.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp.6BA0BD5269CBABBA.idx new file mode 100755 index 0000000000000000000000000000000000000000..867fcbb5fb16483aa9466be4946d4cee474ac07b Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp.6BA0BD5269CBABBA.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp.CFD1881950858A56.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp.CFD1881950858A56.idx new file mode 100755 index 0000000000000000000000000000000000000000..94e3ce438b59775367fe76d2328d72c342124f02 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp.CFD1881950858A56.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp.9CC79FF6A90215E2.idx b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp.9CC79FF6A90215E2.idx new file mode 100755 index 0000000000000000000000000000000000000000..91d0f1a25411eb76731a1b075f62568d950a6875 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp.9CC79FF6A90215E2.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp.577D62F548B80AD3.idx b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp.577D62F548B80AD3.idx new file mode 100755 index 0000000000000000000000000000000000000000..f417033e307a71d67ed5b161d9e23bcce7030b50 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp.577D62F548B80AD3.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_instance.cpp.FC0BE921C33562AA.idx b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_instance.cpp.FC0BE921C33562AA.idx new file mode 100755 index 0000000000000000000000000000000000000000..2075c68304fbb5b01e516d794ced9ee8b069912d Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_instance.cpp.FC0BE921C33562AA.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_interwave_instance.cpp.5E4A00F1466A4D4D.idx b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_interwave_instance.cpp.5E4A00F1466A4D4D.idx new file mode 100755 index 0000000000000000000000000000000000000000..29f532be88cf6d8b75c4424b93631e06bde0b128 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_interwave_instance.cpp.5E4A00F1466A4D4D.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v2_instance.cpp.64CDFD8CF1246F3A.idx b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v2_instance.cpp.64CDFD8CF1246F3A.idx new file mode 100755 index 0000000000000000000000000000000000000000..c54d140bd4086504175562414a8fa37bf67a95ef Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v2_instance.cpp.64CDFD8CF1246F3A.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp.38A1F46E05CA49C4.idx b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp.38A1F46E05CA49C4.idx new file mode 100755 index 0000000000000000000000000000000000000000..c5554e14562140ef4f0ab0add3819548242ff4fb Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp.38A1F46E05CA49C4.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp.6DFDC664FAC2C818.idx b/.cache/clangd/index/device_gemm_xdl_splitk_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp.6DFDC664FAC2C818.idx new file mode 100755 index 0000000000000000000000000000000000000000..f55842224b183ad0c751dfae63544e3152aa19b5 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp.6DFDC664FAC2C818.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_streamk.hpp.424C1309002226DA.idx b/.cache/clangd/index/device_gemm_xdl_streamk.hpp.424C1309002226DA.idx new file mode 100755 index 0000000000000000000000000000000000000000..e56552be455664a6465e4db8d56c7fe812b7397e Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_streamk.hpp.424C1309002226DA.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp.16580B63D7D6888C.idx b/.cache/clangd/index/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp.16580B63D7D6888C.idx new file mode 100755 index 0000000000000000000000000000000000000000..46b6915ed3125ef9954cd568d6ae82b1679a712d Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp.16580B63D7D6888C.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_waveletmodel_cshuffle.hpp.6149EF88F8FEFF53.idx b/.cache/clangd/index/device_gemm_xdl_waveletmodel_cshuffle.hpp.6149EF88F8FEFF53.idx new file mode 100755 index 0000000000000000000000000000000000000000..213dca0ac4b403918dc0b391a1b42a3cbc5469f9 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_waveletmodel_cshuffle.hpp.6149EF88F8FEFF53.idx differ diff --git a/.cache/clangd/index/device_grouped_contraction_multiple_d.hpp.D8736023CD2927DC.idx b/.cache/clangd/index/device_grouped_contraction_multiple_d.hpp.D8736023CD2927DC.idx new file mode 100755 index 0000000000000000000000000000000000000000..36d4244faa9a266178eeedf424a3630d0317fe9e Binary files /dev/null and b/.cache/clangd/index/device_grouped_contraction_multiple_d.hpp.D8736023CD2927DC.idx differ diff --git a/.cache/clangd/index/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp.A6618A549365EFCF.idx b/.cache/clangd/index/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp.A6618A549365EFCF.idx new file mode 100755 index 0000000000000000000000000000000000000000..5ecbcfc7708aff6165896322881a5279012af775 Binary files /dev/null and b/.cache/clangd/index/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp.A6618A549365EFCF.idx differ diff --git a/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp.A4DBB4F8B2C78E56.idx b/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp.A4DBB4F8B2C78E56.idx new file mode 100755 index 0000000000000000000000000000000000000000..af1ae5bec6418f67d98b7b9ed6885ad5d07ba9dd Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp.A4DBB4F8B2C78E56.idx differ diff --git a/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.315EC2A3A6D6E638.idx b/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.315EC2A3A6D6E638.idx new file mode 100755 index 0000000000000000000000000000000000000000..c812c861b6239ba023eb6c71d25311943c3759e0 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.315EC2A3A6D6E638.idx differ diff --git a/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.AA168C235A47B299.idx b/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.AA168C235A47B299.idx new file mode 100755 index 0000000000000000000000000000000000000000..e0b46eafc26aa6fa8acf9d997099244aa10a6656 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.AA168C235A47B299.idx differ diff --git a/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp.FA613A1A341E3811.idx b/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp.FA613A1A341E3811.idx new file mode 100755 index 0000000000000000000000000000000000000000..9c86645709e40d3707273de0d0968c1649ee5e16 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp.FA613A1A341E3811.idx differ diff --git a/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.F48B669764788B12.idx b/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.F48B669764788B12.idx new file mode 100755 index 0000000000000000000000000000000000000000..6cf805a8fb537452564e05401ebc83119f650f8a Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.F48B669764788B12.idx differ diff --git a/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.B296FF7D04FDFC90.idx b/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.B296FF7D04FDFC90.idx new file mode 100755 index 0000000000000000000000000000000000000000..f7f0c76b1a2074a515f4b979d1ed994ac455a2e5 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.B296FF7D04FDFC90.idx differ diff --git a/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp.DDED95C8C9D22159.idx b/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp.DDED95C8C9D22159.idx new file mode 100755 index 0000000000000000000000000000000000000000..d5f65c9d14407caa0e95bc6287afac69eefc504b Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp.DDED95C8C9D22159.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.15D71B57C7DA4CC3.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.15D71B57C7DA4CC3.idx new file mode 100755 index 0000000000000000000000000000000000000000..251fc60ead8429cacab75ac55b2f1f10f6b24567 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.15D71B57C7DA4CC3.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.1D323BBA44E4F194.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.1D323BBA44E4F194.idx new file mode 100755 index 0000000000000000000000000000000000000000..5d85d8f7026d737c56224af9ea17322d206abea1 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.1D323BBA44E4F194.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.900C533B3DB605B7.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.900C533B3DB605B7.idx new file mode 100755 index 0000000000000000000000000000000000000000..fba666d07ce5bedd4d00e55829b16eedab949ab4 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.900C533B3DB605B7.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.5AFEEB9815EAB2BB.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.5AFEEB9815EAB2BB.idx new file mode 100755 index 0000000000000000000000000000000000000000..cb445fed7936a4f404e901f8e38b187ec2634a36 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.5AFEEB9815EAB2BB.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.BC2C6EDB175EB210.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.BC2C6EDB175EB210.idx new file mode 100755 index 0000000000000000000000000000000000000000..bc08aeea6c2f804fea2357d55d1db93f18bb59a4 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.BC2C6EDB175EB210.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.8A9EF4FB27AD09E7.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.8A9EF4FB27AD09E7.idx new file mode 100755 index 0000000000000000000000000000000000000000..5935c162deb67cd51ad81a94a3b8ef45655cfde3 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.8A9EF4FB27AD09E7.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.DE2A54B24DD9CFDF.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.DE2A54B24DD9CFDF.idx new file mode 100755 index 0000000000000000000000000000000000000000..a34e1b08e6fe9ec651d316dee95228c314b7f806 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.DE2A54B24DD9CFDF.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.EBA14072309C61B4.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.EBA14072309C61B4.idx new file mode 100755 index 0000000000000000000000000000000000000000..f2115b0162ea605a77f1da4c4643c1135144745b Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.EBA14072309C61B4.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.5B618BE41EFBBD23.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.5B618BE41EFBBD23.idx new file mode 100755 index 0000000000000000000000000000000000000000..95bbb25faadff3982d5aa4c25f0ec4c52b8fdf9a Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.5B618BE41EFBBD23.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.6161CF2711103FB3.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.6161CF2711103FB3.idx new file mode 100755 index 0000000000000000000000000000000000000000..bc2186bfb77873b2f297f92a21426f5b38b524a3 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.6161CF2711103FB3.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.77ED46D094B0B411.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.77ED46D094B0B411.idx new file mode 100755 index 0000000000000000000000000000000000000000..1362c78f922ae925e199db0db5e5b12f4e40408a Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.77ED46D094B0B411.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.07FF698C2C4EA5B3.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.07FF698C2C4EA5B3.idx new file mode 100755 index 0000000000000000000000000000000000000000..9b37d375e7c66b5614b42f3008aa35604bfbc934 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.07FF698C2C4EA5B3.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.1FAF079E735BA4BC.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.1FAF079E735BA4BC.idx new file mode 100755 index 0000000000000000000000000000000000000000..1e80350eeaa73223ebee729ec01c36e975cd933b Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.1FAF079E735BA4BC.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.FD07473382303B16.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.FD07473382303B16.idx new file mode 100755 index 0000000000000000000000000000000000000000..942505d1008d5c9f2a8c780d716bc37be3a1dfea Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.FD07473382303B16.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.B6F367ED6FE27214.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.B6F367ED6FE27214.idx new file mode 100755 index 0000000000000000000000000000000000000000..643d64a7aa62c695b58fece978e6829945bc0e08 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.B6F367ED6FE27214.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.BE1B4F87BC348C49.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.BE1B4F87BC348C49.idx new file mode 100755 index 0000000000000000000000000000000000000000..0732144486a019e863162d3bf262ead834d74476 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.BE1B4F87BC348C49.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.F02158140F1CDE9D.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.F02158140F1CDE9D.idx new file mode 100755 index 0000000000000000000000000000000000000000..9699169ee86b357a11b13c9a5de1efe42a1383ec Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.F02158140F1CDE9D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.9325EE22E541E919.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.9325EE22E541E919.idx new file mode 100755 index 0000000000000000000000000000000000000000..076ed46fd7f6df789c5da07fe0790c82ea5968bf Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.9325EE22E541E919.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.BBF8A6B4ABA52D27.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.BBF8A6B4ABA52D27.idx new file mode 100755 index 0000000000000000000000000000000000000000..9d5c2ab70aa1e717b86b75bb9fb4e1975ab2aa30 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.BBF8A6B4ABA52D27.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.CE7231939A842BC9.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.CE7231939A842BC9.idx new file mode 100755 index 0000000000000000000000000000000000000000..f444d4b643a6a4da69705bfc4752880721253729 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.CE7231939A842BC9.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp.5A5CAFE59DAED728.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp.5A5CAFE59DAED728.idx new file mode 100755 index 0000000000000000000000000000000000000000..d55f2b519c6bebb6eedb942d42494dbc0974e844 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp.5A5CAFE59DAED728.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.B9B0155CE4F94655.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.B9B0155CE4F94655.idx new file mode 100755 index 0000000000000000000000000000000000000000..6c57d54ea50d7e45cb65ddf3a1ac158c00b1ff71 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.B9B0155CE4F94655.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.F1D87E46323F05EC.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.F1D87E46323F05EC.idx new file mode 100755 index 0000000000000000000000000000000000000000..cff8e92891494f96237b51e0db83c11757a9ceee Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.F1D87E46323F05EC.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp.B674F131151301E0.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp.B674F131151301E0.idx new file mode 100755 index 0000000000000000000000000000000000000000..43f502c7223d96dffaafd3d9ebcd052e52fe3fb7 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp.B674F131151301E0.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp.E06437315FCA8871.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp.E06437315FCA8871.idx new file mode 100755 index 0000000000000000000000000000000000000000..8ec805915378ed5d6b04d579c69adbeaa96cd6bb Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp.E06437315FCA8871.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.159F0712016AF057.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.159F0712016AF057.idx new file mode 100755 index 0000000000000000000000000000000000000000..34f2304c152954022aa6c0bfbae1289fd40741a3 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.159F0712016AF057.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.4C82308AB1780753.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.4C82308AB1780753.idx new file mode 100755 index 0000000000000000000000000000000000000000..162b541d72508544a2227a0ab3a690668c0f483b Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.4C82308AB1780753.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp.872D44B53EF52F10.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp.872D44B53EF52F10.idx new file mode 100755 index 0000000000000000000000000000000000000000..73c15dfafcfd75ed2c2d7b5ac868a5520573f68f Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp.872D44B53EF52F10.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp.83CDF2CC13C4A415.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp.83CDF2CC13C4A415.idx new file mode 100755 index 0000000000000000000000000000000000000000..d216ce4118f07c6cb21d6330b9737452139907f6 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp.83CDF2CC13C4A415.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.A4A9E7E93B446A25.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.A4A9E7E93B446A25.idx new file mode 100755 index 0000000000000000000000000000000000000000..6abf02a098514305c7b586d23a7692d77272cd46 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.A4A9E7E93B446A25.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.729E5DD280A9E536.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.729E5DD280A9E536.idx new file mode 100755 index 0000000000000000000000000000000000000000..f1551a55494c824336362fb8a0397f1483ddcd02 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.729E5DD280A9E536.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp.649F61751C48D48D.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp.649F61751C48D48D.idx new file mode 100755 index 0000000000000000000000000000000000000000..55c61152843612c1a428cb8e90573e0adca4c1a7 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp.649F61751C48D48D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp.8A0C33E6B1E03B1D.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp.8A0C33E6B1E03B1D.idx new file mode 100755 index 0000000000000000000000000000000000000000..cc5dc9dda0859e16e3f64555172ff01a2a7d6a75 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp.8A0C33E6B1E03B1D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.8C4CEC479CC69615.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.8C4CEC479CC69615.idx new file mode 100755 index 0000000000000000000000000000000000000000..1c122fdd680951b7416ef5c3c424c54fee3f546b Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.8C4CEC479CC69615.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.D16C834304C1756E.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.D16C834304C1756E.idx new file mode 100755 index 0000000000000000000000000000000000000000..1953972b69bab58ac424b0d1243ffa67dcf9abb1 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.D16C834304C1756E.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp.1399C252DECC9B8A.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp.1399C252DECC9B8A.idx new file mode 100755 index 0000000000000000000000000000000000000000..f2c3bd813c11afbc2d7045e3f6a7c795120ab922 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp.1399C252DECC9B8A.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.3060AD718DAF4F4F.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.3060AD718DAF4F4F.idx new file mode 100755 index 0000000000000000000000000000000000000000..63bf47e7b9e05a74f3a95a03a620fca4b15292d4 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.3060AD718DAF4F4F.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.8A08D10AF6807EE2.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.8A08D10AF6807EE2.idx new file mode 100755 index 0000000000000000000000000000000000000000..76ccc1117a34ccb36f5631e19a16583cebd217e2 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.8A08D10AF6807EE2.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.FE154FD36F572C54.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.FE154FD36F572C54.idx new file mode 100755 index 0000000000000000000000000000000000000000..b066f8d4df1255d21c2811a2e580c64ed541180a Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.FE154FD36F572C54.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.56FD421A51C8C5E1.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.56FD421A51C8C5E1.idx new file mode 100755 index 0000000000000000000000000000000000000000..bf92734dd74b0c6a9e404c22c2952bc9a6ead8d7 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.56FD421A51C8C5E1.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.C29BF8AC2D59E669.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.C29BF8AC2D59E669.idx new file mode 100755 index 0000000000000000000000000000000000000000..f56ac5818db7a2777a39403530bfa968089abf67 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.C29BF8AC2D59E669.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.9341E131FE1393D1.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.9341E131FE1393D1.idx new file mode 100755 index 0000000000000000000000000000000000000000..6eb4a8d103f2a25c10a1ddf6a5be0ec17b354ad5 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.9341E131FE1393D1.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.11490B4C8E752971.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.11490B4C8E752971.idx new file mode 100755 index 0000000000000000000000000000000000000000..268450253592331152859a554aa7c374a3d8f392 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.11490B4C8E752971.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.ED0E8CE7C6A61531.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.ED0E8CE7C6A61531.idx new file mode 100755 index 0000000000000000000000000000000000000000..a49d19b6aaa3ef9fab1ea9930c14192a836b8521 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.ED0E8CE7C6A61531.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.5ED435B77281FBCD.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.5ED435B77281FBCD.idx new file mode 100755 index 0000000000000000000000000000000000000000..b008b50fbf2cdefe41c715a52a623fdba733f7ab Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.5ED435B77281FBCD.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.069AF0356549719D.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.069AF0356549719D.idx new file mode 100755 index 0000000000000000000000000000000000000000..ad5fa11289998a4db533db1fb07065837b6d5e39 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.069AF0356549719D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.511C2FDB97E265FC.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.511C2FDB97E265FC.idx new file mode 100755 index 0000000000000000000000000000000000000000..02fedb695bf19c2491459a3bdaffafa59ba1acd3 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.511C2FDB97E265FC.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.32DD26A1AFC28FC7.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.32DD26A1AFC28FC7.idx new file mode 100755 index 0000000000000000000000000000000000000000..a677cb2d627c948b34f0ca9d19c08dd0b2676cd3 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.32DD26A1AFC28FC7.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.72C23D8E55C505F5.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.72C23D8E55C505F5.idx new file mode 100755 index 0000000000000000000000000000000000000000..39f18410251c267159574e496842ef7c456ccf4c Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.72C23D8E55C505F5.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.80E125D05CA137B9.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.80E125D05CA137B9.idx new file mode 100755 index 0000000000000000000000000000000000000000..7b08c378f055cc8e30eb019d39674f6c22b9e85c Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.80E125D05CA137B9.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.663BA57C2C8487A1.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.663BA57C2C8487A1.idx new file mode 100755 index 0000000000000000000000000000000000000000..232e13b8fe0778cf7f7bcf32dd82d46d461306ce Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.663BA57C2C8487A1.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.FDD27F96EE937A50.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.FDD27F96EE937A50.idx new file mode 100755 index 0000000000000000000000000000000000000000..d03305666e48649e0e25ea1440e2a926b276693f Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.FDD27F96EE937A50.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.33C6E967995C7F50.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.33C6E967995C7F50.idx new file mode 100755 index 0000000000000000000000000000000000000000..5885bec4d7d8df3004251276efd37aa00a06fbc7 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.33C6E967995C7F50.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.59DE1C78AB7D5F80.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.59DE1C78AB7D5F80.idx new file mode 100755 index 0000000000000000000000000000000000000000..2b6a79c1e826f48a441594ef00bb3c47e7e98a95 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.59DE1C78AB7D5F80.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.F3F4F9DA4C0F8BDA.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.F3F4F9DA4C0F8BDA.idx new file mode 100755 index 0000000000000000000000000000000000000000..0952cde8b8bd1055ba630636a76a371bf6d68ba5 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.F3F4F9DA4C0F8BDA.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.A371B626E37E8E07.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.A371B626E37E8E07.idx new file mode 100755 index 0000000000000000000000000000000000000000..7246b0fcbd7ccd6e9e03554bacdd7f75e4add066 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.A371B626E37E8E07.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.AB40F2DD408B7367.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.AB40F2DD408B7367.idx new file mode 100755 index 0000000000000000000000000000000000000000..d69c3185b2869b992cc3eab89fbff78732cd89df Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.AB40F2DD408B7367.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.18F54267E4ABEC46.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.18F54267E4ABEC46.idx new file mode 100755 index 0000000000000000000000000000000000000000..8556d42bbc7f339431c0bc8088b275064e56ad03 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.18F54267E4ABEC46.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.192FAAC8A46AFF2C.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.192FAAC8A46AFF2C.idx new file mode 100755 index 0000000000000000000000000000000000000000..68b38b8a820917c75e4668b374c74d22322de6fb Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.192FAAC8A46AFF2C.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp.F56F1203139F8DB9.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp.F56F1203139F8DB9.idx new file mode 100755 index 0000000000000000000000000000000000000000..1bd12f4cceced40be843622eb05940181635900a Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp.F56F1203139F8DB9.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.4C04B7436EBF8811.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.4C04B7436EBF8811.idx new file mode 100755 index 0000000000000000000000000000000000000000..abec3628c9e4d15b18064042e7af8593ae41c09b Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.4C04B7436EBF8811.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.D8BA94FD49F7A399.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.D8BA94FD49F7A399.idx new file mode 100755 index 0000000000000000000000000000000000000000..54db1ed90414c6586060ac87d6a9e4ed4b98bbe8 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.D8BA94FD49F7A399.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C14D5D3E331E58A4.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C14D5D3E331E58A4.idx new file mode 100755 index 0000000000000000000000000000000000000000..21ad6329a1804c7cef5c0927aa1c9b2a3e7e3e8b Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C14D5D3E331E58A4.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.066E2D36BE87D012.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.066E2D36BE87D012.idx new file mode 100755 index 0000000000000000000000000000000000000000..15bde06a442a46d6da38e606baad7d30715f2e65 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.066E2D36BE87D012.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.341F4486D5AB6E5A.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.341F4486D5AB6E5A.idx new file mode 100755 index 0000000000000000000000000000000000000000..e00d96c3b5eebcf19aed06ecd2d50ebd9be8ff7e Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.341F4486D5AB6E5A.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.5A4CE3D80E2AF506.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.5A4CE3D80E2AF506.idx new file mode 100755 index 0000000000000000000000000000000000000000..082cd833d7115efafe88b7f9bd8b4fbd3ed61a7b Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.5A4CE3D80E2AF506.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.403AE32B0F021E71.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.403AE32B0F021E71.idx new file mode 100755 index 0000000000000000000000000000000000000000..9cc1dfb06b718971750480fa8ec3a774d9afa040 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.403AE32B0F021E71.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.9C8F172FA791F403.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.9C8F172FA791F403.idx new file mode 100755 index 0000000000000000000000000000000000000000..0fe87938410225c1992401f716cad528d81a7d82 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.9C8F172FA791F403.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.2C419A4DCD2F2168.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.2C419A4DCD2F2168.idx new file mode 100755 index 0000000000000000000000000000000000000000..148099c580cdc7b61032efccef0028489877aa17 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.2C419A4DCD2F2168.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.217BA51B10EED1BA.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.217BA51B10EED1BA.idx new file mode 100755 index 0000000000000000000000000000000000000000..a71092c3a33e0f75bd29e51e0df009f0a02aaf4a Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.217BA51B10EED1BA.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.CE8AF0B7DC8618CC.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.CE8AF0B7DC8618CC.idx new file mode 100755 index 0000000000000000000000000000000000000000..4fa095847156547d88f8247c0eeb50c392081bd1 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.CE8AF0B7DC8618CC.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.8B802336DBA274D4.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.8B802336DBA274D4.idx new file mode 100755 index 0000000000000000000000000000000000000000..2539d7d22c3f255e1c8274ac0181d9058053eba8 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.8B802336DBA274D4.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.17BBA75937C47055.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.17BBA75937C47055.idx new file mode 100755 index 0000000000000000000000000000000000000000..aeb9b1607dd1938801a8fa9e9beedfb3a7773a5a Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.17BBA75937C47055.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.9C7E5EE7984A7775.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.9C7E5EE7984A7775.idx new file mode 100755 index 0000000000000000000000000000000000000000..c3369396945c2f68b02cc40408beefd2ab1208bf Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.9C7E5EE7984A7775.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.15DDB04E41B23392.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.15DDB04E41B23392.idx new file mode 100755 index 0000000000000000000000000000000000000000..23a7bbc48f125287f920a66862a0a7649543420f Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.15DDB04E41B23392.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.C031B76FFFB0A20D.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.C031B76FFFB0A20D.idx new file mode 100755 index 0000000000000000000000000000000000000000..acf2197547d4612fddeea1cafab60c70fdd67ba5 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.C031B76FFFB0A20D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.C7145A60489EBDB5.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.C7145A60489EBDB5.idx new file mode 100755 index 0000000000000000000000000000000000000000..933a2277d3053c7948f0b47ab0a8e2c51dab8349 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.C7145A60489EBDB5.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C5AE3E9229A8EFB4.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C5AE3E9229A8EFB4.idx new file mode 100755 index 0000000000000000000000000000000000000000..fd55c7f849cf562da8acb17b6f14b23fadc2e921 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C5AE3E9229A8EFB4.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp.8F52AF019F02B944.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp.8F52AF019F02B944.idx new file mode 100755 index 0000000000000000000000000000000000000000..465fc541e4e1582f2ce211b116cd0fc753afea50 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp.8F52AF019F02B944.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.3C3BEC3EC7808D5A.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.3C3BEC3EC7808D5A.idx new file mode 100755 index 0000000000000000000000000000000000000000..113a03e7260bf0549f336acda17ce278236587c5 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.3C3BEC3EC7808D5A.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.2554570633260D87.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.2554570633260D87.idx new file mode 100755 index 0000000000000000000000000000000000000000..098bb82fe29eb34ab331b244644ed28b3460bdac Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.2554570633260D87.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp.D8F2815AC5D0D7D6.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp.D8F2815AC5D0D7D6.idx new file mode 100755 index 0000000000000000000000000000000000000000..62ef0954b94146ba0c0bc194fd78334888823d35 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp.D8F2815AC5D0D7D6.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp.0FC1DC5FE52A0C60.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp.0FC1DC5FE52A0C60.idx new file mode 100755 index 0000000000000000000000000000000000000000..de60f391d277ba3cc605023fc82260c7996e5565 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp.0FC1DC5FE52A0C60.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.0151E7B579E72F93.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.0151E7B579E72F93.idx new file mode 100755 index 0000000000000000000000000000000000000000..88485d710a33272dd3d80fbf6b300b5080a93007 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.0151E7B579E72F93.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.C058D2A1DEE10686.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.C058D2A1DEE10686.idx new file mode 100755 index 0000000000000000000000000000000000000000..8bad777c3c2a8328e2ffc0e8ae52d348e634a25f Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.C058D2A1DEE10686.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp.278F3C311504F9EC.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp.278F3C311504F9EC.idx new file mode 100755 index 0000000000000000000000000000000000000000..c59c78b58e8ccd853f025a90c21c55c2330960cd Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp.278F3C311504F9EC.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp.8DE51E7D513808A9.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp.8DE51E7D513808A9.idx new file mode 100755 index 0000000000000000000000000000000000000000..54811546169da0b4913fb06d00c144ce5db5166e Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp.8DE51E7D513808A9.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.B3BD1E3E3F6E4877.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.B3BD1E3E3F6E4877.idx new file mode 100755 index 0000000000000000000000000000000000000000..8d46b8ee94b618ea5936163cbeb9c2679fa5977b Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.B3BD1E3E3F6E4877.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.3955AFFBF9D5A2EF.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.3955AFFBF9D5A2EF.idx new file mode 100755 index 0000000000000000000000000000000000000000..fc6f51d6da540c775391534da6c16fcb7bd281fa Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.3955AFFBF9D5A2EF.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp.C97D071C6295D5F3.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp.C97D071C6295D5F3.idx new file mode 100755 index 0000000000000000000000000000000000000000..ea24c856958788adf953739f34b8fbc33692bd47 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp.C97D071C6295D5F3.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp.E621A4FA60DE216B.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp.E621A4FA60DE216B.idx new file mode 100755 index 0000000000000000000000000000000000000000..6f7643f8d8c84f39a7aa4c7fc6f516ae99fb2066 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp.E621A4FA60DE216B.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.06F3665DC4884ADD.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.06F3665DC4884ADD.idx new file mode 100755 index 0000000000000000000000000000000000000000..aa7a7231bdaf23f43df8b2def1cdf7393bbfecdc Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.06F3665DC4884ADD.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.8A48A77FE8BA877A.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.8A48A77FE8BA877A.idx new file mode 100755 index 0000000000000000000000000000000000000000..2ae0b599b1e17b7fa2cb5ecd1e669f2a10b5bcc1 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.8A48A77FE8BA877A.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp.85809BA7EA818954.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp.85809BA7EA818954.idx new file mode 100755 index 0000000000000000000000000000000000000000..c72734627f57cea8b7c71694305fa6487375dc8f Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp.85809BA7EA818954.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.FB2E0A9CD009454C.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.FB2E0A9CD009454C.idx new file mode 100755 index 0000000000000000000000000000000000000000..06ac96331eb69e12791e963b0be56407b51b6b75 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.FB2E0A9CD009454C.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.21AF094B4B66A48D.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.21AF094B4B66A48D.idx new file mode 100755 index 0000000000000000000000000000000000000000..337051c91a69355331ca726759da8833d268d7eb Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.21AF094B4B66A48D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.D78BE8BB81161A14.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.D78BE8BB81161A14.idx new file mode 100755 index 0000000000000000000000000000000000000000..c69162528d72ddb5f6f7b7101437df2373583daa Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.D78BE8BB81161A14.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.143C90F3DEDC2341.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.143C90F3DEDC2341.idx new file mode 100755 index 0000000000000000000000000000000000000000..3194548487954adf1c7c62f6eefab3393a751ad9 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.143C90F3DEDC2341.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.426F56794F68AE44.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.426F56794F68AE44.idx new file mode 100755 index 0000000000000000000000000000000000000000..7fd04b4823a2b73300e63d3687cddd3d0e90790d Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.426F56794F68AE44.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.1C2FE2C88C10C9D6.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.1C2FE2C88C10C9D6.idx new file mode 100755 index 0000000000000000000000000000000000000000..b92e640c5bc5e985caf4bab1c491e5a8b04244ae Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.1C2FE2C88C10C9D6.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.BB1A75845E7DE245.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.BB1A75845E7DE245.idx new file mode 100755 index 0000000000000000000000000000000000000000..1241ecb56209cbadbf8cd52001d6b46c0ea155ce Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.BB1A75845E7DE245.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp.A1E84AD887D755DB.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp.A1E84AD887D755DB.idx new file mode 100755 index 0000000000000000000000000000000000000000..31fa84a2ecb9b0774098ff1269ba088a9e5dc487 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp.A1E84AD887D755DB.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.43CB1A6BDABED53D.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.43CB1A6BDABED53D.idx new file mode 100755 index 0000000000000000000000000000000000000000..a81cda35c32aa8e09d086b2fc9e5480ec5149a3d Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.43CB1A6BDABED53D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp.336E2C49F8565116.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp.336E2C49F8565116.idx new file mode 100755 index 0000000000000000000000000000000000000000..6ecb7e5ea71e9e58ab0a7f7c11ab6e8cc5677889 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp.336E2C49F8565116.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.117E24CA3DC21528.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.117E24CA3DC21528.idx new file mode 100755 index 0000000000000000000000000000000000000000..fa4464ae0369b187b769133a6775d0434d371244 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.117E24CA3DC21528.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.59CFAC251237A8A2.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.59CFAC251237A8A2.idx new file mode 100755 index 0000000000000000000000000000000000000000..a5ea514f8b07d233240d9f87d953c0e0b393e786 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.59CFAC251237A8A2.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.B76F39C8DBBE2FD2.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.B76F39C8DBBE2FD2.idx new file mode 100755 index 0000000000000000000000000000000000000000..f3e1cdc2720aea2a4a567515e56a074c353de0b0 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.B76F39C8DBBE2FD2.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.9F991AFB3119C4B9.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.9F991AFB3119C4B9.idx new file mode 100755 index 0000000000000000000000000000000000000000..cb5d1e0def03047a452ffa40395d46f25a679d26 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.9F991AFB3119C4B9.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.A3477CD2DF481BB7.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.A3477CD2DF481BB7.idx new file mode 100755 index 0000000000000000000000000000000000000000..dde2eb592d778f0113532e33a6f3a6c8f4423d51 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.A3477CD2DF481BB7.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C60AD9A6203D4F40.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C60AD9A6203D4F40.idx new file mode 100755 index 0000000000000000000000000000000000000000..2e9354759fdf73fbd5cffcfe0075ad8b55231de2 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C60AD9A6203D4F40.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.B74C847DC291EB3D.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.B74C847DC291EB3D.idx new file mode 100755 index 0000000000000000000000000000000000000000..0ea6f449406e2af231bdf5ff24cd482719269c29 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.B74C847DC291EB3D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.91739D7EC5D8C4E5.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.91739D7EC5D8C4E5.idx new file mode 100755 index 0000000000000000000000000000000000000000..702365b4cebb59253388ed2cd181750bdeb94156 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.91739D7EC5D8C4E5.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.612D072BD9DA9C78.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.612D072BD9DA9C78.idx new file mode 100755 index 0000000000000000000000000000000000000000..62eecf9d13bb8cb83cae21822c7f8c88fbb2b037 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.612D072BD9DA9C78.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.91C96AAC7F5772DF.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.91C96AAC7F5772DF.idx new file mode 100755 index 0000000000000000000000000000000000000000..67a6b90b1c63fa2ae11eabd841f566fcae6801ae Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.91C96AAC7F5772DF.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.A06BD8EFCD977C2D.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.A06BD8EFCD977C2D.idx new file mode 100755 index 0000000000000000000000000000000000000000..b256627d3274602e0e296c41d9f99e8308cd9746 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.A06BD8EFCD977C2D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.0F35369ACDDEB6D2.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.0F35369ACDDEB6D2.idx new file mode 100755 index 0000000000000000000000000000000000000000..92f36103e52cbc007e4d5bdaab112791a780994e Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.0F35369ACDDEB6D2.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.3DF5C2CC3145E799.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.3DF5C2CC3145E799.idx new file mode 100755 index 0000000000000000000000000000000000000000..9fe62fba00dd0bf56203d3b8eab3b3be6b2c6387 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.3DF5C2CC3145E799.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.B3BE2F1E169224A4.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.B3BE2F1E169224A4.idx new file mode 100755 index 0000000000000000000000000000000000000000..7e603dfdd03284a5aa453ac97e0767e242e878d2 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.B3BE2F1E169224A4.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.ED09CF7C1628C3B6.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.ED09CF7C1628C3B6.idx new file mode 100755 index 0000000000000000000000000000000000000000..0140ef4eb2a892d04d592b570bc269990bc42606 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.ED09CF7C1628C3B6.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d.hpp.CB0E4FDBA2354B83.idx b/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d.hpp.CB0E4FDBA2354B83.idx new file mode 100755 index 0000000000000000000000000000000000000000..1a8bdc1f85839ed72c45e5a0853db2f46b0c5ab4 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d.hpp.CB0E4FDBA2354B83.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp.62AEEAEB4FE47662.idx b/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp.62AEEAEB4FE47662.idx new file mode 100755 index 0000000000000000000000000000000000000000..fb08a94c3a047111e1255e4a6e7d914cf15d98ba Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp.62AEEAEB4FE47662.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp.DA7CD25895EF8940.idx b/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp.DA7CD25895EF8940.idx new file mode 100755 index 0000000000000000000000000000000000000000..762868bb73294b09ec3c1f74bfe42ed1b68c2e54 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp.DA7CD25895EF8940.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_data_wmma_f16_instance.hpp.FE4A66EFBF344CEC.idx b/.cache/clangd/index/device_grouped_conv_bwd_data_wmma_f16_instance.hpp.FE4A66EFBF344CEC.idx new file mode 100755 index 0000000000000000000000000000000000000000..feb8f66eca14c4923200144d07c5e2c37415b378 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_data_wmma_f16_instance.hpp.FE4A66EFBF344CEC.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_data_wmma_i8_instance.hpp.22BF4E121F8B1D63.idx b/.cache/clangd/index/device_grouped_conv_bwd_data_wmma_i8_instance.hpp.22BF4E121F8B1D63.idx new file mode 100755 index 0000000000000000000000000000000000000000..b40a34ad4aca94fa026aad16948b6f3af4cbb4aa Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_data_wmma_i8_instance.hpp.22BF4E121F8B1D63.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_bilinear_instance.hpp.8935210F01B214E2.idx b/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_bilinear_instance.hpp.8935210F01B214E2.idx new file mode 100755 index 0000000000000000000000000000000000000000..146798544a26c57283d67222919074d5dbe77bc3 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_bilinear_instance.hpp.8935210F01B214E2.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_instance.hpp.7A273F666FE77357.idx b/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_instance.hpp.7A273F666FE77357.idx new file mode 100755 index 0000000000000000000000000000000000000000..ef82df0d6f989f50d6642d5b1034674afb6a4a51 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_instance.hpp.7A273F666FE77357.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_scale_instance.hpp.A306BD940C6CC8E3.idx b/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_scale_instance.hpp.A306BD940C6CC8E3.idx new file mode 100755 index 0000000000000000000000000000000000000000..d25ebcfe9b3a2719f7aebffe40fcf38609c44a93 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_scale_instance.hpp.A306BD940C6CC8E3.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_weight.hpp.FB767EE60994A05E.idx b/.cache/clangd/index/device_grouped_conv_bwd_weight.hpp.FB767EE60994A05E.idx new file mode 100755 index 0000000000000000000000000000000000000000..831fc9e040f0edf39515f88276f2ea6a31912609 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_weight.hpp.FB767EE60994A05E.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp.2CEBA1D92F4EDC1A.idx b/.cache/clangd/index/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp.2CEBA1D92F4EDC1A.idx new file mode 100755 index 0000000000000000000000000000000000000000..b627f54ffe42a121497784b7c5b71b74e4e4c791 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp.2CEBA1D92F4EDC1A.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_weight_wmma_instance.hpp.D8A887BE715EE7ED.idx b/.cache/clangd/index/device_grouped_conv_bwd_weight_wmma_instance.hpp.D8A887BE715EE7ED.idx new file mode 100755 index 0000000000000000000000000000000000000000..e926f551d0487bf57aa124e7cfe6979a506b1a21 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_weight_wmma_instance.hpp.D8A887BE715EE7ED.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp.7B0144E3F93B6769.idx b/.cache/clangd/index/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp.7B0144E3F93B6769.idx new file mode 100755 index 0000000000000000000000000000000000000000..295f53117dd6104939fdea443c6783a3e886001b Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp.7B0144E3F93B6769.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_weight_xdl_instance.hpp.DF771DCE13775534.idx b/.cache/clangd/index/device_grouped_conv_bwd_weight_xdl_instance.hpp.DF771DCE13775534.idx new file mode 100755 index 0000000000000000000000000000000000000000..8c93763ad6ca2407f64de90c0da166595dac6f45 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_weight_xdl_instance.hpp.DF771DCE13775534.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_multiple_abd.hpp.ED9472BD269F359D.idx b/.cache/clangd/index/device_grouped_conv_fwd_multiple_abd.hpp.ED9472BD269F359D.idx new file mode 100755 index 0000000000000000000000000000000000000000..d1baa52097344ba3a47da9d6fca2b8120fb99913 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_multiple_abd.hpp.ED9472BD269F359D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp.52A5FB3B86490259.idx b/.cache/clangd/index/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp.52A5FB3B86490259.idx new file mode 100755 index 0000000000000000000000000000000000000000..cb3f51f4ffa97cb56f8fb997055d92d225db8048 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp.52A5FB3B86490259.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_multiple_d.hpp.79D64EC0672BD2D2.idx b/.cache/clangd/index/device_grouped_conv_fwd_multiple_d.hpp.79D64EC0672BD2D2.idx new file mode 100755 index 0000000000000000000000000000000000000000..9d26b8e93e54eef8f282ac92cbbcecce91920372 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_multiple_d.hpp.79D64EC0672BD2D2.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_multiple_r.hpp.4DCB681EEBE1DA60.idx b/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_multiple_r.hpp.4DCB681EEBE1DA60.idx new file mode 100755 index 0000000000000000000000000000000000000000..cc53f0da458030bf660b2de7499762770647bc3d Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_multiple_r.hpp.4DCB681EEBE1DA60.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp.CF5C02CB8B4910C1.idx b/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp.CF5C02CB8B4910C1.idx new file mode 100755 index 0000000000000000000000000000000000000000..33afdfd0402fd66a23e339ba59572a5857f9cfc4 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp.CF5C02CB8B4910C1.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp.EC51DA2544106248.idx b/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp.EC51DA2544106248.idx new file mode 100755 index 0000000000000000000000000000000000000000..a878430c418d3501689d43394aad5211a0324616 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp.EC51DA2544106248.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_wmma_instance.hpp.377DCBF55D8BBCBB.idx b/.cache/clangd/index/device_grouped_conv_fwd_wmma_instance.hpp.377DCBF55D8BBCBB.idx new file mode 100755 index 0000000000000000000000000000000000000000..47c0b2e1333ab35cb903b7c8ec39f05196dc82c3 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_wmma_instance.hpp.377DCBF55D8BBCBB.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_xdl_bilinear_instance.hpp.4827161718A2E399.idx b/.cache/clangd/index/device_grouped_conv_fwd_xdl_bilinear_instance.hpp.4827161718A2E399.idx new file mode 100755 index 0000000000000000000000000000000000000000..0ef24882f59298226648174268588a99eeb3b831 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_xdl_bilinear_instance.hpp.4827161718A2E399.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_xdl_instance.hpp.DE209B7BB2124A4A.idx b/.cache/clangd/index/device_grouped_conv_fwd_xdl_instance.hpp.DE209B7BB2124A4A.idx new file mode 100755 index 0000000000000000000000000000000000000000..44b29922f7d187ad8964b7dff3f70e7dc0121a5a Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_xdl_instance.hpp.DE209B7BB2124A4A.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_xdl_scale_instance.hpp.F388F6485863F7A4.idx b/.cache/clangd/index/device_grouped_conv_fwd_xdl_scale_instance.hpp.F388F6485863F7A4.idx new file mode 100755 index 0000000000000000000000000000000000000000..215542b142ddfb6fb940d23d1b4b2a59759be18c Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_xdl_scale_instance.hpp.F388F6485863F7A4.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_xdl_scaleadd_ab_instance.hpp.043570DA9A94B804.idx b/.cache/clangd/index/device_grouped_conv_fwd_xdl_scaleadd_ab_instance.hpp.043570DA9A94B804.idx new file mode 100755 index 0000000000000000000000000000000000000000..a36df96b394124952083077b0948b10d52cb243a Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_xdl_scaleadd_ab_instance.hpp.043570DA9A94B804.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp.8003C44E683CA082.idx b/.cache/clangd/index/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp.8003C44E683CA082.idx new file mode 100755 index 0000000000000000000000000000000000000000..bfb90c0a149d38c30e3f7d390ff06ca6ff6cffeb Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp.8003C44E683CA082.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_utils.hpp.82861A708A167591.idx b/.cache/clangd/index/device_grouped_conv_utils.hpp.82861A708A167591.idx new file mode 100755 index 0000000000000000000000000000000000000000..10ea0f430d31189c53c1c26876e954de1e63161f Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_utils.hpp.82861A708A167591.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm.hpp.96241388E9D6D4CD.idx b/.cache/clangd/index/device_grouped_gemm.hpp.96241388E9D6D4CD.idx new file mode 100755 index 0000000000000000000000000000000000000000..ab02bf01c84537690f8604e15065b0c65007c566 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm.hpp.96241388E9D6D4CD.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp.B5F90340138DCBDD.idx b/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp.B5F90340138DCBDD.idx new file mode 100755 index 0000000000000000000000000000000000000000..6f6f303c6c042687f9969b309192bc1e59762567 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp.B5F90340138DCBDD.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp.BC9F6A61DFACFE50.idx b/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp.BC9F6A61DFACFE50.idx new file mode 100755 index 0000000000000000000000000000000000000000..a084a82038d83c059e4bc2cd10e9ab8799a13028 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp.BC9F6A61DFACFE50.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp.F7CCE132C2B5DC0C.idx b/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp.F7CCE132C2B5DC0C.idx new file mode 100755 index 0000000000000000000000000000000000000000..f532a55879e20bc8ca92e361bfc8eed77c2c6bb2 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp.F7CCE132C2B5DC0C.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp.9DF3794E443669AC.idx b/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp.9DF3794E443669AC.idx new file mode 100755 index 0000000000000000000000000000000000000000..b70f0cc02ad62e2154c2f3d4b872a402f8b8696a Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp.9DF3794E443669AC.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_fixed_nk.hpp.6F8F8F18341308F9.idx b/.cache/clangd/index/device_grouped_gemm_fixed_nk.hpp.6F8F8F18341308F9.idx new file mode 100755 index 0000000000000000000000000000000000000000..3feaab5c0cf7fc315a6cedddbf8b4f2188029830 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_fixed_nk.hpp.6F8F8F18341308F9.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_softmax_gemm_permute.hpp.3C2429E11C534BCE.idx b/.cache/clangd/index/device_grouped_gemm_softmax_gemm_permute.hpp.3C2429E11C534BCE.idx new file mode 100755 index 0000000000000000000000000000000000000000..400472e974812fcf303814e4d6d1a83b4ab41dae Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_softmax_gemm_permute.hpp.3C2429E11C534BCE.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp.75AE4D40F988002C.idx b/.cache/clangd/index/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp.75AE4D40F988002C.idx new file mode 100755 index 0000000000000000000000000000000000000000..de53f1b7204ea938cbdafaf6245b1fb6f625e0d2 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp.75AE4D40F988002C.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_splitk.hpp.6F08C0BD625D58A2.idx b/.cache/clangd/index/device_grouped_gemm_splitk.hpp.6F08C0BD625D58A2.idx new file mode 100755 index 0000000000000000000000000000000000000000..77f39df6833ab46e3a46bfc76a5d5fcfb5801824 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_splitk.hpp.6F08C0BD625D58A2.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl.hpp.46AD692E57980C90.idx b/.cache/clangd/index/device_grouped_gemm_xdl.hpp.46AD692E57980C90.idx new file mode 100755 index 0000000000000000000000000000000000000000..721a30ab4eca4c3f0d5cff855c8aba1824ded8a2 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl.hpp.46AD692E57980C90.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp.37D37DC699ECCA33.idx b/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp.37D37DC699ECCA33.idx new file mode 100755 index 0000000000000000000000000000000000000000..7afaf0be7d49ab01c48821f8672f0ee122ae8c1c Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp.37D37DC699ECCA33.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp.1D6C6BE3C31CA169.idx b/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp.1D6C6BE3C31CA169.idx new file mode 100755 index 0000000000000000000000000000000000000000..31e701f6796b2242691686246a1f25804137d2bc Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp.1D6C6BE3C31CA169.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp.24FD07A2963099BE.idx b/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp.24FD07A2963099BE.idx new file mode 100755 index 0000000000000000000000000000000000000000..51d7446a6bf7c5e1ef21eb3cda15e643db72e350 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp.24FD07A2963099BE.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp.32B3D11C83B5AEA6.idx b/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp.32B3D11C83B5AEA6.idx new file mode 100755 index 0000000000000000000000000000000000000000..951c4901861bf6ac501ac6cca0499e20533befd7 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp.32B3D11C83B5AEA6.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk.hpp.4C6A715EB09F77E1.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk.hpp.4C6A715EB09F77E1.idx new file mode 100755 index 0000000000000000000000000000000000000000..9eb6fecae2defa8225bc597087307c4ea488e866 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk.hpp.4C6A715EB09F77E1.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_kn_mn_instance.cpp.47280D4E92EC5D1F.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_kn_mn_instance.cpp.47280D4E92EC5D1F.idx new file mode 100755 index 0000000000000000000000000000000000000000..1fda3304ad1a5ceb010ef04490eb18c95373a366 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_kn_mn_instance.cpp.47280D4E92EC5D1F.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_nk_mn_instance.cpp.1DAF74A63A4FD2D6.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_nk_mn_instance.cpp.1DAF74A63A4FD2D6.idx new file mode 100755 index 0000000000000000000000000000000000000000..0a9211148c0d943a8d527c89cb75e958826ee91b Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_nk_mn_instance.cpp.1DAF74A63A4FD2D6.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp.CD8E0FEB6A298EEB.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp.CD8E0FEB6A298EEB.idx new file mode 100755 index 0000000000000000000000000000000000000000..6364304ca89adf5c7cb14ad9d1e118b92c05129f Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp.CD8E0FEB6A298EEB.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp.C65F333685492D7B.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp.C65F333685492D7B.idx new file mode 100755 index 0000000000000000000000000000000000000000..fbedd0aa52cdc203057cdae04fb81c35e6c81b7d Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp.C65F333685492D7B.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp.D0AB01FA41C26CA6.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp.D0AB01FA41C26CA6.idx new file mode 100755 index 0000000000000000000000000000000000000000..5e6a50f614a0749aee2e2d0ab05f1c5b135d7ab3 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp.D0AB01FA41C26CA6.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp.43793441438DD042.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp.43793441438DD042.idx new file mode 100755 index 0000000000000000000000000000000000000000..5b2e2077d082157c13cd8637a7bda2a007a9bea2 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp.43793441438DD042.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp.2F31492B9345223B.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp.2F31492B9345223B.idx new file mode 100755 index 0000000000000000000000000000000000000000..46c89fe115d502fe02c189d9494b056321ccf4cd Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp.2F31492B9345223B.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp.A7598EAA8CFFD492.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp.A7598EAA8CFFD492.idx new file mode 100755 index 0000000000000000000000000000000000000000..ac50a242f6e17ffdb56e42263b373fe6cff22fc7 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp.A7598EAA8CFFD492.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp.FC712CF5F768A94B.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp.FC712CF5F768A94B.idx new file mode 100755 index 0000000000000000000000000000000000000000..3f7a4605f39c46d90c0c669f1aff421c927baafe Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp.FC712CF5F768A94B.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp.6FC4AF50AF38A41A.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp.6FC4AF50AF38A41A.idx new file mode 100755 index 0000000000000000000000000000000000000000..b12e081ff00d4b68219d09b34b1dda43ba236476 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp.6FC4AF50AF38A41A.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp.B22CC0B7C42EE2B4.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp.B22CC0B7C42EE2B4.idx new file mode 100755 index 0000000000000000000000000000000000000000..ba17cc6860b2fe54d7a1a1a5f94b3ae8f91bec5e Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp.B22CC0B7C42EE2B4.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp.E11E98EDBDF90D0B.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp.E11E98EDBDF90D0B.idx new file mode 100755 index 0000000000000000000000000000000000000000..a16ffc18d741f4c178909fa551bdab262d369ecf Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp.E11E98EDBDF90D0B.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_splitk_cshuffle.hpp.CE155CFA093515AD.idx b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_cshuffle.hpp.CE155CFA093515AD.idx new file mode 100755 index 0000000000000000000000000000000000000000..c85d894aca9093fc94544fe4d0d6c20e5b0472fa Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_cshuffle.hpp.CE155CFA093515AD.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp.5121BAECA46B0C23.idx b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp.5121BAECA46B0C23.idx new file mode 100755 index 0000000000000000000000000000000000000000..62b2281895d1caa4885c3d1461cd807135542fa0 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp.5121BAECA46B0C23.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp.1597C09D10EC33F9.idx b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp.1597C09D10EC33F9.idx new file mode 100755 index 0000000000000000000000000000000000000000..d78992b5c1774c07de6f952e8a27260b983afbab Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp.1597C09D10EC33F9.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp.FBCAD14B8CE81D3F.idx b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp.FBCAD14B8CE81D3F.idx new file mode 100755 index 0000000000000000000000000000000000000000..264093ef7c96f231fd4ca8ebd054037cc372ff04 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp.FBCAD14B8CE81D3F.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp.F03BAFFDB1120A94.idx b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp.F03BAFFDB1120A94.idx new file mode 100755 index 0000000000000000000000000000000000000000..2f7adb5724a0400c2781d3335f29347efc6edf34 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp.F03BAFFDB1120A94.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f8_f16_mk_kn_mn_irregular_instance.cpp.EADFE46F5C6C50DA.idx b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f8_f16_mk_kn_mn_irregular_instance.cpp.EADFE46F5C6C50DA.idx new file mode 100755 index 0000000000000000000000000000000000000000..cd64544a59691a06d83087c8010b6f6a068d5aed Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f8_f16_mk_kn_mn_irregular_instance.cpp.EADFE46F5C6C50DA.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f8_f16_f16_mk_kn_mn_irregular_instance.cpp.5656416FBF150D8D.idx b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f8_f16_f16_mk_kn_mn_irregular_instance.cpp.5656416FBF150D8D.idx new file mode 100755 index 0000000000000000000000000000000000000000..75b128e083a6cdbc5bc0c0d463ca67929d4baa56 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f8_f16_f16_mk_kn_mn_irregular_instance.cpp.5656416FBF150D8D.idx differ diff --git a/.cache/clangd/index/device_groupnorm_bwd_data_f32_instance.cpp.6CBCB642104CF767.idx b/.cache/clangd/index/device_groupnorm_bwd_data_f32_instance.cpp.6CBCB642104CF767.idx new file mode 100755 index 0000000000000000000000000000000000000000..fea89b1c7c5f9ceff47a0f485624855d7da4be5e Binary files /dev/null and b/.cache/clangd/index/device_groupnorm_bwd_data_f32_instance.cpp.6CBCB642104CF767.idx differ diff --git a/.cache/clangd/index/device_groupnorm_bwd_gamma_beta_f32_instance.cpp.59A23F12AA5E3659.idx b/.cache/clangd/index/device_groupnorm_bwd_gamma_beta_f32_instance.cpp.59A23F12AA5E3659.idx new file mode 100755 index 0000000000000000000000000000000000000000..031556422218f670ef9d870ab172c98bfb855ca4 Binary files /dev/null and b/.cache/clangd/index/device_groupnorm_bwd_gamma_beta_f32_instance.cpp.59A23F12AA5E3659.idx differ diff --git a/.cache/clangd/index/device_groupnorm_fwd_f16_instance.cpp.E5229B139F1A9B82.idx b/.cache/clangd/index/device_groupnorm_fwd_f16_instance.cpp.E5229B139F1A9B82.idx new file mode 100755 index 0000000000000000000000000000000000000000..3b94e1bd944d1f73a549d860fb20c8b680ccf759 Binary files /dev/null and b/.cache/clangd/index/device_groupnorm_fwd_f16_instance.cpp.E5229B139F1A9B82.idx differ diff --git a/.cache/clangd/index/device_groupnorm_fwd_f32_instance.cpp.D9D1D3F585AA6CD7.idx b/.cache/clangd/index/device_groupnorm_fwd_f32_instance.cpp.D9D1D3F585AA6CD7.idx new file mode 100755 index 0000000000000000000000000000000000000000..8c15a7517138238bbab564c45f0eac6f413a6ef1 Binary files /dev/null and b/.cache/clangd/index/device_groupnorm_fwd_f32_instance.cpp.D9D1D3F585AA6CD7.idx differ diff --git a/.cache/clangd/index/device_groupnorm_fwd_swish_f16_f32_f32_f16_instance.cpp.4F6E8B2271D69F1C.idx b/.cache/clangd/index/device_groupnorm_fwd_swish_f16_f32_f32_f16_instance.cpp.4F6E8B2271D69F1C.idx new file mode 100755 index 0000000000000000000000000000000000000000..c72d19680bb8e560af4a2e3780c800e20f6c93cb Binary files /dev/null and b/.cache/clangd/index/device_groupnorm_fwd_swish_f16_f32_f32_f16_instance.cpp.4F6E8B2271D69F1C.idx differ diff --git a/.cache/clangd/index/device_groupnorm_fwd_swish_f16_instance.cpp.3151733ECD55CFAB.idx b/.cache/clangd/index/device_groupnorm_fwd_swish_f16_instance.cpp.3151733ECD55CFAB.idx new file mode 100755 index 0000000000000000000000000000000000000000..47f64cc623aec1072675293f099cff4a9de28dd4 Binary files /dev/null and b/.cache/clangd/index/device_groupnorm_fwd_swish_f16_instance.cpp.3151733ECD55CFAB.idx differ diff --git a/.cache/clangd/index/device_groupnorm_fwd_swish_f32_instance.cpp.7C71C8C8383AE2D3.idx b/.cache/clangd/index/device_groupnorm_fwd_swish_f32_instance.cpp.7C71C8C8383AE2D3.idx new file mode 100755 index 0000000000000000000000000000000000000000..2c235eb5142fbc046f2bd5a4ea379821e66c2c75 Binary files /dev/null and b/.cache/clangd/index/device_groupnorm_fwd_swish_f32_instance.cpp.7C71C8C8383AE2D3.idx differ diff --git a/.cache/clangd/index/device_image_to_column_gndhwc_3d_instance.cpp.B02146942667831D.idx b/.cache/clangd/index/device_image_to_column_gndhwc_3d_instance.cpp.B02146942667831D.idx new file mode 100755 index 0000000000000000000000000000000000000000..e841ec95cfbe787ec174e24215c71731d5f473ac Binary files /dev/null and b/.cache/clangd/index/device_image_to_column_gndhwc_3d_instance.cpp.B02146942667831D.idx differ diff --git a/.cache/clangd/index/device_image_to_column_gnhwc_2d_instance.cpp.BAF41643A25FDB03.idx b/.cache/clangd/index/device_image_to_column_gnhwc_2d_instance.cpp.BAF41643A25FDB03.idx new file mode 100755 index 0000000000000000000000000000000000000000..88a61b4431e18a0f1dc7969c5d6954c718a2f7c0 Binary files /dev/null and b/.cache/clangd/index/device_image_to_column_gnhwc_2d_instance.cpp.BAF41643A25FDB03.idx differ diff --git a/.cache/clangd/index/device_image_to_column_gnwc_1d_instance.cpp.3EF12405C93313FD.idx b/.cache/clangd/index/device_image_to_column_gnwc_1d_instance.cpp.3EF12405C93313FD.idx new file mode 100755 index 0000000000000000000000000000000000000000..7c4189bc59a5da66cd83b477faf1f5db6767ff46 Binary files /dev/null and b/.cache/clangd/index/device_image_to_column_gnwc_1d_instance.cpp.3EF12405C93313FD.idx differ diff --git a/.cache/clangd/index/device_image_to_column_impl.hpp.C46D2CA10F5E5642.idx b/.cache/clangd/index/device_image_to_column_impl.hpp.C46D2CA10F5E5642.idx new file mode 100755 index 0000000000000000000000000000000000000000..3f4eb285b7cf157325f333da512b56b5591a9118 Binary files /dev/null and b/.cache/clangd/index/device_image_to_column_impl.hpp.C46D2CA10F5E5642.idx differ diff --git a/.cache/clangd/index/device_image_to_column_instance.hpp.94D5335C0371893F.idx b/.cache/clangd/index/device_image_to_column_instance.hpp.94D5335C0371893F.idx new file mode 100755 index 0000000000000000000000000000000000000000..f0ad3d5929e02dadcc19ddaa97084890de5b383b Binary files /dev/null and b/.cache/clangd/index/device_image_to_column_instance.hpp.94D5335C0371893F.idx differ diff --git a/.cache/clangd/index/device_image_to_column_ndhwgc_3d_instance.cpp.709987A54287EE7C.idx b/.cache/clangd/index/device_image_to_column_ndhwgc_3d_instance.cpp.709987A54287EE7C.idx new file mode 100755 index 0000000000000000000000000000000000000000..dcce595072a83b02812e34edf3abcbcdc7dec91e Binary files /dev/null and b/.cache/clangd/index/device_image_to_column_ndhwgc_3d_instance.cpp.709987A54287EE7C.idx differ diff --git a/.cache/clangd/index/device_image_to_column_nhwgc_2d_instance.cpp.E16A024C295F734A.idx b/.cache/clangd/index/device_image_to_column_nhwgc_2d_instance.cpp.E16A024C295F734A.idx new file mode 100755 index 0000000000000000000000000000000000000000..57d39235b904b71862dc9ea6766f580ec116c95d Binary files /dev/null and b/.cache/clangd/index/device_image_to_column_nhwgc_2d_instance.cpp.E16A024C295F734A.idx differ diff --git a/.cache/clangd/index/device_image_to_column_nwgc_1d_instance.cpp.D56D4C0735B48980.idx b/.cache/clangd/index/device_image_to_column_nwgc_1d_instance.cpp.D56D4C0735B48980.idx new file mode 100755 index 0000000000000000000000000000000000000000..adf441a213a6db4ab07db44f122d3ceb2a77a645 Binary files /dev/null and b/.cache/clangd/index/device_image_to_column_nwgc_1d_instance.cpp.D56D4C0735B48980.idx differ diff --git a/.cache/clangd/index/device_layernorm2d_bwd_data_f16_instance.cpp.7967A43F40615422.idx b/.cache/clangd/index/device_layernorm2d_bwd_data_f16_instance.cpp.7967A43F40615422.idx new file mode 100755 index 0000000000000000000000000000000000000000..dd36674a6656001d8db67cadfef10945f85d77de Binary files /dev/null and b/.cache/clangd/index/device_layernorm2d_bwd_data_f16_instance.cpp.7967A43F40615422.idx differ diff --git a/.cache/clangd/index/device_layernorm2d_bwd_data_f32_instance.cpp.BCCFAE80AA603EB7.idx b/.cache/clangd/index/device_layernorm2d_bwd_data_f32_instance.cpp.BCCFAE80AA603EB7.idx new file mode 100755 index 0000000000000000000000000000000000000000..b34215b662bf216bc4c9a3c3b3081913c518f0dd Binary files /dev/null and b/.cache/clangd/index/device_layernorm2d_bwd_data_f32_instance.cpp.BCCFAE80AA603EB7.idx differ diff --git a/.cache/clangd/index/device_layernorm2d_bwd_gamma_beta_f16_instance.cpp.756B8C951E2AA179.idx b/.cache/clangd/index/device_layernorm2d_bwd_gamma_beta_f16_instance.cpp.756B8C951E2AA179.idx new file mode 100755 index 0000000000000000000000000000000000000000..c0a57a4b0cbfc970e3839d41deec03ed698d6cc8 Binary files /dev/null and b/.cache/clangd/index/device_layernorm2d_bwd_gamma_beta_f16_instance.cpp.756B8C951E2AA179.idx differ diff --git a/.cache/clangd/index/device_layernorm2d_bwd_gamma_beta_f32_instance.cpp.4E9CDD5C09C49289.idx b/.cache/clangd/index/device_layernorm2d_bwd_gamma_beta_f32_instance.cpp.4E9CDD5C09C49289.idx new file mode 100755 index 0000000000000000000000000000000000000000..0e6743a5b62591fe3a3e54c4691874b57dce5b8d Binary files /dev/null and b/.cache/clangd/index/device_layernorm2d_bwd_gamma_beta_f32_instance.cpp.4E9CDD5C09C49289.idx differ diff --git a/.cache/clangd/index/device_layernorm2d_fwd_f16_instance.cpp.572C171AE3FAAC3B.idx b/.cache/clangd/index/device_layernorm2d_fwd_f16_instance.cpp.572C171AE3FAAC3B.idx new file mode 100755 index 0000000000000000000000000000000000000000..f2b90c03cefc287e576d56c1c60e56c34138e8f1 Binary files /dev/null and b/.cache/clangd/index/device_layernorm2d_fwd_f16_instance.cpp.572C171AE3FAAC3B.idx differ diff --git a/.cache/clangd/index/device_layernorm2d_fwd_f32_instance.cpp.C34D1A2542CE24BA.idx b/.cache/clangd/index/device_layernorm2d_fwd_f32_instance.cpp.C34D1A2542CE24BA.idx new file mode 100755 index 0000000000000000000000000000000000000000..e959ff7210f0bb9d09b44c72788157d8a62680b2 Binary files /dev/null and b/.cache/clangd/index/device_layernorm2d_fwd_f32_instance.cpp.C34D1A2542CE24BA.idx differ diff --git a/.cache/clangd/index/device_layernorm4d_fwd_f16_instance.cpp.4D368D0AFAADE23F.idx b/.cache/clangd/index/device_layernorm4d_fwd_f16_instance.cpp.4D368D0AFAADE23F.idx new file mode 100755 index 0000000000000000000000000000000000000000..737cacba4620bb6b8571c153d18add907a5c5868 Binary files /dev/null and b/.cache/clangd/index/device_layernorm4d_fwd_f16_instance.cpp.4D368D0AFAADE23F.idx differ diff --git a/.cache/clangd/index/device_layernorm4d_fwd_f32_instance.cpp.82CA4512D672E7A2.idx b/.cache/clangd/index/device_layernorm4d_fwd_f32_instance.cpp.82CA4512D672E7A2.idx new file mode 100755 index 0000000000000000000000000000000000000000..be7f2f597be2b41f80c0f7f42418ec1e1c8cb9ae Binary files /dev/null and b/.cache/clangd/index/device_layernorm4d_fwd_f32_instance.cpp.82CA4512D672E7A2.idx differ diff --git a/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp.59F686652075C1F3.idx b/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp.59F686652075C1F3.idx new file mode 100755 index 0000000000000000000000000000000000000000..93072285a19567fa72d2f4e9f773acc4bd9aef65 Binary files /dev/null and b/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp.59F686652075C1F3.idx differ diff --git a/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_f16_instance.cpp.075B2EC4AC466F2A.idx b/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_f16_instance.cpp.075B2EC4AC466F2A.idx new file mode 100755 index 0000000000000000000000000000000000000000..ade6870ab94c81397fffa6bf7a346ca232b5f91b Binary files /dev/null and b/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_f16_instance.cpp.075B2EC4AC466F2A.idx differ diff --git a/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_f32_instance.cpp.2CC50FB6EE948F2B.idx b/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_f32_instance.cpp.2CC50FB6EE948F2B.idx new file mode 100755 index 0000000000000000000000000000000000000000..4516486081b190fe14b65e1692da4eea13e13990 Binary files /dev/null and b/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_f32_instance.cpp.2CC50FB6EE948F2B.idx differ diff --git a/.cache/clangd/index/device_max_pool_bwd.hpp.67E96E50742F2EC4.idx b/.cache/clangd/index/device_max_pool_bwd.hpp.67E96E50742F2EC4.idx new file mode 100755 index 0000000000000000000000000000000000000000..5e869eceeb8b6b03211d62c539f15d1e1a3f42b9 Binary files /dev/null and b/.cache/clangd/index/device_max_pool_bwd.hpp.67E96E50742F2EC4.idx differ diff --git a/.cache/clangd/index/device_max_pool_bwd_bf16_instance.cpp.142DD51C4CE34BA2.idx b/.cache/clangd/index/device_max_pool_bwd_bf16_instance.cpp.142DD51C4CE34BA2.idx new file mode 100755 index 0000000000000000000000000000000000000000..975b9b121b3fae626898c1ca91874df53e7746ef Binary files /dev/null and b/.cache/clangd/index/device_max_pool_bwd_bf16_instance.cpp.142DD51C4CE34BA2.idx differ diff --git a/.cache/clangd/index/device_max_pool_bwd_f16_instance.cpp.DCBDF5FDAE579133.idx b/.cache/clangd/index/device_max_pool_bwd_f16_instance.cpp.DCBDF5FDAE579133.idx new file mode 100755 index 0000000000000000000000000000000000000000..64d336ead6583e588687554c3b001816b478d591 Binary files /dev/null and b/.cache/clangd/index/device_max_pool_bwd_f16_instance.cpp.DCBDF5FDAE579133.idx differ diff --git a/.cache/clangd/index/device_max_pool_bwd_f32_instance.cpp.EA3D33FB1230F655.idx b/.cache/clangd/index/device_max_pool_bwd_f32_instance.cpp.EA3D33FB1230F655.idx new file mode 100755 index 0000000000000000000000000000000000000000..002db4dc9de642d868d97b182163ef612d0260fb Binary files /dev/null and b/.cache/clangd/index/device_max_pool_bwd_f32_instance.cpp.EA3D33FB1230F655.idx differ diff --git a/.cache/clangd/index/device_max_pool_bwd_impl.hpp.85737AA1C70FEFBE.idx b/.cache/clangd/index/device_max_pool_bwd_impl.hpp.85737AA1C70FEFBE.idx new file mode 100755 index 0000000000000000000000000000000000000000..7d0381ceed1c5fe0e5589d825ae0de57abb4e17f Binary files /dev/null and b/.cache/clangd/index/device_max_pool_bwd_impl.hpp.85737AA1C70FEFBE.idx differ diff --git a/.cache/clangd/index/device_memory.cpp.A744015D851F9812.idx b/.cache/clangd/index/device_memory.cpp.A744015D851F9812.idx new file mode 100755 index 0000000000000000000000000000000000000000..f7bbf88908e478259ffab0dd91d0ecb83fc6bf13 Binary files /dev/null and b/.cache/clangd/index/device_memory.cpp.A744015D851F9812.idx differ diff --git a/.cache/clangd/index/device_memory.hpp.C2F2D98A33C55529.idx b/.cache/clangd/index/device_memory.hpp.C2F2D98A33C55529.idx new file mode 100755 index 0000000000000000000000000000000000000000..3368d3eba49ad255b0abdc4c3b52427d020342af Binary files /dev/null and b/.cache/clangd/index/device_memory.hpp.C2F2D98A33C55529.idx differ diff --git a/.cache/clangd/index/device_multiple_reduce.hpp.188F02C49C64E773.idx b/.cache/clangd/index/device_multiple_reduce.hpp.188F02C49C64E773.idx new file mode 100755 index 0000000000000000000000000000000000000000..334a2253ffe79aa327963c3ba0de2279a1686d3a Binary files /dev/null and b/.cache/clangd/index/device_multiple_reduce.hpp.188F02C49C64E773.idx differ diff --git a/.cache/clangd/index/device_multiple_reduce_multiblock.hpp.BAAA7F55A7B0C61A.idx b/.cache/clangd/index/device_multiple_reduce_multiblock.hpp.BAAA7F55A7B0C61A.idx new file mode 100755 index 0000000000000000000000000000000000000000..69f1d2e19cdcbd98c1f6d7cca26edbc65e0e24c1 Binary files /dev/null and b/.cache/clangd/index/device_multiple_reduce_multiblock.hpp.BAAA7F55A7B0C61A.idx differ diff --git a/.cache/clangd/index/device_multiple_reduce_threadwise.hpp.956BE9BDF24D2435.idx b/.cache/clangd/index/device_multiple_reduce_threadwise.hpp.956BE9BDF24D2435.idx new file mode 100755 index 0000000000000000000000000000000000000000..6104a1590113f8fd59ed067be147e2c243dd0e65 Binary files /dev/null and b/.cache/clangd/index/device_multiple_reduce_threadwise.hpp.956BE9BDF24D2435.idx differ diff --git a/.cache/clangd/index/device_normalization_bwd_data.hpp.1BAA9B467AEFC28F.idx b/.cache/clangd/index/device_normalization_bwd_data.hpp.1BAA9B467AEFC28F.idx new file mode 100755 index 0000000000000000000000000000000000000000..1168b13c40bdb05f04429461c9e2cdb5a3f2218e Binary files /dev/null and b/.cache/clangd/index/device_normalization_bwd_data.hpp.1BAA9B467AEFC28F.idx differ diff --git a/.cache/clangd/index/device_normalization_bwd_data_impl.hpp.A627060CAA69A19F.idx b/.cache/clangd/index/device_normalization_bwd_data_impl.hpp.A627060CAA69A19F.idx new file mode 100755 index 0000000000000000000000000000000000000000..f4e31374887b6370d16182a695d1a8e7f0ff5a23 Binary files /dev/null and b/.cache/clangd/index/device_normalization_bwd_data_impl.hpp.A627060CAA69A19F.idx differ diff --git a/.cache/clangd/index/device_normalization_bwd_gamma_beta.hpp.78751175103676B4.idx b/.cache/clangd/index/device_normalization_bwd_gamma_beta.hpp.78751175103676B4.idx new file mode 100755 index 0000000000000000000000000000000000000000..9ae7a53839ada48944d522e595040fbfb7dc58af Binary files /dev/null and b/.cache/clangd/index/device_normalization_bwd_gamma_beta.hpp.78751175103676B4.idx differ diff --git a/.cache/clangd/index/device_normalization_bwd_gamma_beta_impl.hpp.A5BD375EAE722F5E.idx b/.cache/clangd/index/device_normalization_bwd_gamma_beta_impl.hpp.A5BD375EAE722F5E.idx new file mode 100755 index 0000000000000000000000000000000000000000..1e64a0d39b186ff362132f21dd7b1743964765ea Binary files /dev/null and b/.cache/clangd/index/device_normalization_bwd_gamma_beta_impl.hpp.A5BD375EAE722F5E.idx differ diff --git a/.cache/clangd/index/device_normalization_fwd.hpp.DBDBAF89813C337F.idx b/.cache/clangd/index/device_normalization_fwd.hpp.DBDBAF89813C337F.idx new file mode 100755 index 0000000000000000000000000000000000000000..d1f37b0c5860befa7793c1e4461209634e7c1972 Binary files /dev/null and b/.cache/clangd/index/device_normalization_fwd.hpp.DBDBAF89813C337F.idx differ diff --git a/.cache/clangd/index/device_normalization_fwd_impl.hpp.2930CBBD3A2BF4E9.idx b/.cache/clangd/index/device_normalization_fwd_impl.hpp.2930CBBD3A2BF4E9.idx new file mode 100755 index 0000000000000000000000000000000000000000..cba7f05b52d4e452d9ed10e85c8ff1da7e9f9e4e Binary files /dev/null and b/.cache/clangd/index/device_normalization_fwd_impl.hpp.2930CBBD3A2BF4E9.idx differ diff --git a/.cache/clangd/index/device_normalization_fwd_splitk_impl.hpp.6FD53387232480CC.idx b/.cache/clangd/index/device_normalization_fwd_splitk_impl.hpp.6FD53387232480CC.idx new file mode 100755 index 0000000000000000000000000000000000000000..5de48554c525179b3e87ddc02671317aa2a146cb Binary files /dev/null and b/.cache/clangd/index/device_normalization_fwd_splitk_impl.hpp.6FD53387232480CC.idx differ diff --git a/.cache/clangd/index/device_normalize_instance.cpp.FD40702354DAF5AA.idx b/.cache/clangd/index/device_normalize_instance.cpp.FD40702354DAF5AA.idx new file mode 100755 index 0000000000000000000000000000000000000000..90d0ef9eaa1f4f26c2c907363f27e2ce71b869bc Binary files /dev/null and b/.cache/clangd/index/device_normalize_instance.cpp.FD40702354DAF5AA.idx differ diff --git a/.cache/clangd/index/device_operation_instance_factory.hpp.115B69A1EE76F903.idx b/.cache/clangd/index/device_operation_instance_factory.hpp.115B69A1EE76F903.idx new file mode 100755 index 0000000000000000000000000000000000000000..88199850a1c4d019518e83b9fa496bb711dd0699 Binary files /dev/null and b/.cache/clangd/index/device_operation_instance_factory.hpp.115B69A1EE76F903.idx differ diff --git a/.cache/clangd/index/device_permute.hpp.47CCE84C97E4E9FC.idx b/.cache/clangd/index/device_permute.hpp.47CCE84C97E4E9FC.idx new file mode 100755 index 0000000000000000000000000000000000000000..37c5147034cfa02256427b79a51dd61bd736823b Binary files /dev/null and b/.cache/clangd/index/device_permute.hpp.47CCE84C97E4E9FC.idx differ diff --git a/.cache/clangd/index/device_permute_impl.hpp.BEE7C34D1958CA2F.idx b/.cache/clangd/index/device_permute_impl.hpp.BEE7C34D1958CA2F.idx new file mode 100755 index 0000000000000000000000000000000000000000..9e44096662d42981d4df7088797bab1d8193b5e2 Binary files /dev/null and b/.cache/clangd/index/device_permute_impl.hpp.BEE7C34D1958CA2F.idx differ diff --git a/.cache/clangd/index/device_permute_scale_1d_instances.cpp.7EB32958B0512D73.idx b/.cache/clangd/index/device_permute_scale_1d_instances.cpp.7EB32958B0512D73.idx new file mode 100755 index 0000000000000000000000000000000000000000..f245650045cb9e5d9dd5c4875bb1d6be0c272a80 Binary files /dev/null and b/.cache/clangd/index/device_permute_scale_1d_instances.cpp.7EB32958B0512D73.idx differ diff --git a/.cache/clangd/index/device_permute_scale_2d_instances.cpp.2B80C1D2C455DED0.idx b/.cache/clangd/index/device_permute_scale_2d_instances.cpp.2B80C1D2C455DED0.idx new file mode 100755 index 0000000000000000000000000000000000000000..372d2d9f3a9c3f199806850ad088fbd63e44fdf0 Binary files /dev/null and b/.cache/clangd/index/device_permute_scale_2d_instances.cpp.2B80C1D2C455DED0.idx differ diff --git a/.cache/clangd/index/device_permute_scale_3d_instances.cpp.07A2A609833FE4D0.idx b/.cache/clangd/index/device_permute_scale_3d_instances.cpp.07A2A609833FE4D0.idx new file mode 100755 index 0000000000000000000000000000000000000000..4a8461f376a96018f4b412728f7213382c5a3992 Binary files /dev/null and b/.cache/clangd/index/device_permute_scale_3d_instances.cpp.07A2A609833FE4D0.idx differ diff --git a/.cache/clangd/index/device_permute_scale_4d_instances.cpp.18341306CF0F5EFD.idx b/.cache/clangd/index/device_permute_scale_4d_instances.cpp.18341306CF0F5EFD.idx new file mode 100755 index 0000000000000000000000000000000000000000..bd80d15cc7faad3e81b23fb3674182d97c0e8473 Binary files /dev/null and b/.cache/clangd/index/device_permute_scale_4d_instances.cpp.18341306CF0F5EFD.idx differ diff --git a/.cache/clangd/index/device_permute_scale_5d_instances.cpp.81367B36B3C16096.idx b/.cache/clangd/index/device_permute_scale_5d_instances.cpp.81367B36B3C16096.idx new file mode 100755 index 0000000000000000000000000000000000000000..fd8b9d1bd5f6d103df5c1d0f8d5255815fdd9f0f Binary files /dev/null and b/.cache/clangd/index/device_permute_scale_5d_instances.cpp.81367B36B3C16096.idx differ diff --git a/.cache/clangd/index/device_permute_scale_6d_instances.cpp.A19C7755BA5CCAF6.idx b/.cache/clangd/index/device_permute_scale_6d_instances.cpp.A19C7755BA5CCAF6.idx new file mode 100755 index 0000000000000000000000000000000000000000..965f8362faddc5250e985e6f0ff77a26716a9a64 Binary files /dev/null and b/.cache/clangd/index/device_permute_scale_6d_instances.cpp.A19C7755BA5CCAF6.idx differ diff --git a/.cache/clangd/index/device_permute_scale_instances.hpp.6002ED9F3C6C6BA1.idx b/.cache/clangd/index/device_permute_scale_instances.hpp.6002ED9F3C6C6BA1.idx new file mode 100755 index 0000000000000000000000000000000000000000..c7a1340fb6065ae3484dab41e49ce2dd3dd90d4a Binary files /dev/null and b/.cache/clangd/index/device_permute_scale_instances.hpp.6002ED9F3C6C6BA1.idx differ diff --git a/.cache/clangd/index/device_pool2d_fwd_nhwc_nhwc.hpp.8F42F3A58241D669.idx b/.cache/clangd/index/device_pool2d_fwd_nhwc_nhwc.hpp.8F42F3A58241D669.idx new file mode 100755 index 0000000000000000000000000000000000000000..416eadca25460fd7ca32350d265cbfb2711b11fc Binary files /dev/null and b/.cache/clangd/index/device_pool2d_fwd_nhwc_nhwc.hpp.8F42F3A58241D669.idx differ diff --git a/.cache/clangd/index/device_pool3d_fwd_ndhwc_ndhwc.hpp.821043598893E6B4.idx b/.cache/clangd/index/device_pool3d_fwd_ndhwc_ndhwc.hpp.821043598893E6B4.idx new file mode 100755 index 0000000000000000000000000000000000000000..cdefc863784555c9a47e36a569fec37760705cca Binary files /dev/null and b/.cache/clangd/index/device_pool3d_fwd_ndhwc_ndhwc.hpp.821043598893E6B4.idx differ diff --git a/.cache/clangd/index/device_pool_fwd.hpp.048422E56031C936.idx b/.cache/clangd/index/device_pool_fwd.hpp.048422E56031C936.idx new file mode 100755 index 0000000000000000000000000000000000000000..8f68847155416e94b2cac6858217c34bc72c3f0a Binary files /dev/null and b/.cache/clangd/index/device_pool_fwd.hpp.048422E56031C936.idx differ diff --git a/.cache/clangd/index/device_prop.hpp.51F1A892B5410B39.idx b/.cache/clangd/index/device_prop.hpp.51F1A892B5410B39.idx new file mode 100755 index 0000000000000000000000000000000000000000..bdae4b0f91741574208df65e3d7302852f21f55e Binary files /dev/null and b/.cache/clangd/index/device_prop.hpp.51F1A892B5410B39.idx differ diff --git a/.cache/clangd/index/device_put_element.hpp.50E2F2788BD7B2E7.idx b/.cache/clangd/index/device_put_element.hpp.50E2F2788BD7B2E7.idx new file mode 100755 index 0000000000000000000000000000000000000000..eeec123aeb1573006974fd4fc27349a9bc95b676 Binary files /dev/null and b/.cache/clangd/index/device_put_element.hpp.50E2F2788BD7B2E7.idx differ diff --git a/.cache/clangd/index/device_put_element_impl.hpp.CF1E04C6FC237429.idx b/.cache/clangd/index/device_put_element_impl.hpp.CF1E04C6FC237429.idx new file mode 100755 index 0000000000000000000000000000000000000000..dabf15e66aa4c8db14fa26b919d2626e4be382f0 Binary files /dev/null and b/.cache/clangd/index/device_put_element_impl.hpp.CF1E04C6FC237429.idx differ diff --git a/.cache/clangd/index/device_reduce.hpp.17BAD15A3D93B7FE.idx b/.cache/clangd/index/device_reduce.hpp.17BAD15A3D93B7FE.idx new file mode 100755 index 0000000000000000000000000000000000000000..9b56af31419da9894c1360ec610adee2adcabb6b Binary files /dev/null and b/.cache/clangd/index/device_reduce.hpp.17BAD15A3D93B7FE.idx differ diff --git a/.cache/clangd/index/device_reduce_common.hpp.CAE83A095739383D.idx b/.cache/clangd/index/device_reduce_common.hpp.CAE83A095739383D.idx new file mode 100755 index 0000000000000000000000000000000000000000..fc44ece3032709ad4f5a9e90f189e6436e4aa682 Binary files /dev/null and b/.cache/clangd/index/device_reduce_common.hpp.CAE83A095739383D.idx differ diff --git a/.cache/clangd/index/device_reduce_instance.hpp.15BDAE27A6CA281A.idx b/.cache/clangd/index/device_reduce_instance.hpp.15BDAE27A6CA281A.idx new file mode 100755 index 0000000000000000000000000000000000000000..f7b8f7d676c221b3ff7c57872f6661000ef8dfa0 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance.hpp.15BDAE27A6CA281A.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise.hpp.C5FE82438250FD9A.idx b/.cache/clangd/index/device_reduce_instance_blockwise.hpp.C5FE82438250FD9A.idx new file mode 100755 index 0000000000000000000000000000000000000000..11638553188b369d4898e5965b76413e61b6a3c0 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise.hpp.C5FE82438250FD9A.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_add.cpp.A1938EEEABE8B42A.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_add.cpp.A1938EEEABE8B42A.idx new file mode 100755 index 0000000000000000000000000000000000000000..766a32987ac7db0b5baa4257092b6167dc5cede2 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_add.cpp.A1938EEEABE8B42A.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_add.hpp.20070C9325736FAE.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_add.hpp.20070C9325736FAE.idx new file mode 100755 index 0000000000000000000000000000000000000000..fd2943c97139c1798230e5e30a1a6ceec3047c3e Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_add.hpp.20070C9325736FAE.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp.3CFF0C60C7399DEC.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp.3CFF0C60C7399DEC.idx new file mode 100755 index 0000000000000000000000000000000000000000..c11e67eac4607d3a25eeb9362ccc75f9f1d676d7 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp.3CFF0C60C7399DEC.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp.8C9F3AF79C3259FF.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp.8C9F3AF79C3259FF.idx new file mode 100755 index 0000000000000000000000000000000000000000..769ad2f0b5d0eefa84e96a1ff32a917c042e9eab Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp.8C9F3AF79C3259FF.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp.39D9A35BA78EDFCA.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp.39D9A35BA78EDFCA.idx new file mode 100755 index 0000000000000000000000000000000000000000..f6dee36658b1178086720fd70e5c7737f6aea5b3 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp.39D9A35BA78EDFCA.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp.D3804A89BDDBD68A.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp.D3804A89BDDBD68A.idx new file mode 100755 index 0000000000000000000000000000000000000000..87dc5e1005df22ba7c4e9cf1d7dc82ab3ca4e29f Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp.D3804A89BDDBD68A.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_max.cpp.26EBD8A22938C32C.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_max.cpp.26EBD8A22938C32C.idx new file mode 100755 index 0000000000000000000000000000000000000000..94149a0e5e5ae43223fbedbd77c4177466c81bc1 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_max.cpp.26EBD8A22938C32C.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_max.hpp.EFFF67751118917D.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_max.hpp.EFFF67751118917D.idx new file mode 100755 index 0000000000000000000000000000000000000000..136e14fecd92a4422de45653cfd8de3173eb77ea Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_max.hpp.EFFF67751118917D.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_min.cpp.82123925B2E47DE5.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_min.cpp.82123925B2E47DE5.idx new file mode 100755 index 0000000000000000000000000000000000000000..8cb9a23cf80fc363ee8c112897db19af8ed2016a Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_min.cpp.82123925B2E47DE5.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_min.hpp.1AFBE8401A0BB727.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_min.hpp.1AFBE8401A0BB727.idx new file mode 100755 index 0000000000000000000000000000000000000000..d23c3412ef4b5779bfc472861251b984fe49f9d0 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_min.hpp.1AFBE8401A0BB727.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp.4AE84C1155A0BD16.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp.4AE84C1155A0BD16.idx new file mode 100755 index 0000000000000000000000000000000000000000..2700fb7e29b1e4810a638d74f6205874d35c7edd Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp.4AE84C1155A0BD16.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp.E3EB62B3883D54D2.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp.E3EB62B3883D54D2.idx new file mode 100755 index 0000000000000000000000000000000000000000..724964577d9d77767c9fb509831fa3f6e2c6eb5f Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp.E3EB62B3883D54D2.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp.116108BDF1BABC19.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp.116108BDF1BABC19.idx new file mode 100755 index 0000000000000000000000000000000000000000..ee74385df8375b41bddd136039519be166dff78d Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp.116108BDF1BABC19.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp.9716CDC40BFB2514.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp.9716CDC40BFB2514.idx new file mode 100755 index 0000000000000000000000000000000000000000..1f83b89cfbedbb5b5e386ca3ca10e7c5f28c294a Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp.9716CDC40BFB2514.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_max.cpp.F4D8C3EC38AC4A06.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_max.cpp.F4D8C3EC38AC4A06.idx new file mode 100755 index 0000000000000000000000000000000000000000..6b582a00cfd4a05da48c6de07b7d411f142b8768 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_max.cpp.F4D8C3EC38AC4A06.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_max.hpp.E4FC0C2AEABCC117.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_max.hpp.E4FC0C2AEABCC117.idx new file mode 100755 index 0000000000000000000000000000000000000000..dd84c05a4885986d158809909c029aa7b869540c Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_max.hpp.E4FC0C2AEABCC117.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_min.cpp.8D16D62A0EBE3E6E.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_min.cpp.8D16D62A0EBE3E6E.idx new file mode 100755 index 0000000000000000000000000000000000000000..ee8bac987635a682cfabf531b097d807206af0e6 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_min.cpp.8D16D62A0EBE3E6E.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_min.hpp.4404F6E1807E5BD4.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_min.hpp.4404F6E1807E5BD4.idx new file mode 100755 index 0000000000000000000000000000000000000000..03b13083da87661d997426688a2ba1a80cd98c7a Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_min.hpp.4404F6E1807E5BD4.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_add.cpp.6312AF648C08A121.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_add.cpp.6312AF648C08A121.idx new file mode 100755 index 0000000000000000000000000000000000000000..a401a5d92391659418693044e8b887e3eaf488fd Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_add.cpp.6312AF648C08A121.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_add.hpp.70C411CE7496D684.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_add.hpp.70C411CE7496D684.idx new file mode 100755 index 0000000000000000000000000000000000000000..4179663f06e97cb992a8f9827bb3adeae173eca4 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_add.hpp.70C411CE7496D684.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp.BB89842592147883.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp.BB89842592147883.idx new file mode 100755 index 0000000000000000000000000000000000000000..8a8042ffc36b84c877abb127677d3a581e5853db Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp.BB89842592147883.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp.08DE17CE7004DBDC.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp.08DE17CE7004DBDC.idx new file mode 100755 index 0000000000000000000000000000000000000000..61651ed42a3a6de18bbdba399df6d174dfa6b062 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp.08DE17CE7004DBDC.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp.15C86B2B21F27130.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp.15C86B2B21F27130.idx new file mode 100755 index 0000000000000000000000000000000000000000..bc4534e6624d755770ae717db5ac248bb0176034 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp.15C86B2B21F27130.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp.152CF392CC9366CD.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp.152CF392CC9366CD.idx new file mode 100755 index 0000000000000000000000000000000000000000..d004a4b8a42a161bbb03092bb0a0f22a3cc56cc3 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp.152CF392CC9366CD.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_add.cpp.07FADC8850641162.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_add.cpp.07FADC8850641162.idx new file mode 100755 index 0000000000000000000000000000000000000000..70fcd9199efb675dc15e638105d9ae8c89f37601 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_add.cpp.07FADC8850641162.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_add.hpp.B81990FE8C5D8509.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_add.hpp.B81990FE8C5D8509.idx new file mode 100755 index 0000000000000000000000000000000000000000..eaacfdf4587ac82d8a923d8bd88f6d1899016c41 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_add.hpp.B81990FE8C5D8509.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp.562409E747DBF8A8.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp.562409E747DBF8A8.idx new file mode 100755 index 0000000000000000000000000000000000000000..c6b48fa578ef51882b9f693fc27158c25bb2d498 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp.562409E747DBF8A8.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp.DF4FA5048878CC52.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp.DF4FA5048878CC52.idx new file mode 100755 index 0000000000000000000000000000000000000000..b9aa906233c716da2e68a9fc47dbdedb00ecef22 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp.DF4FA5048878CC52.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp.F349F4E1672B2959.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp.F349F4E1672B2959.idx new file mode 100755 index 0000000000000000000000000000000000000000..abc24bb695f3882e7310355d0f5f5ff509df59ca Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp.F349F4E1672B2959.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp.1B257007DC8DF36B.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp.1B257007DC8DF36B.idx new file mode 100755 index 0000000000000000000000000000000000000000..a855882109160d2d9086cabf635518527a9e5ae7 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp.1B257007DC8DF36B.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_max.cpp.58C6C0B25D770B51.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_max.cpp.58C6C0B25D770B51.idx new file mode 100755 index 0000000000000000000000000000000000000000..503ed8527916df65ed640656564228bfb6d2899f Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_max.cpp.58C6C0B25D770B51.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_max.hpp.9E23A16BC92C20DB.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_max.hpp.9E23A16BC92C20DB.idx new file mode 100755 index 0000000000000000000000000000000000000000..8774b23bacdd3b1d338fd6eecc1adcb8ddd05ff9 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_max.hpp.9E23A16BC92C20DB.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_min.cpp.0B95E75A805DE595.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_min.cpp.0B95E75A805DE595.idx new file mode 100755 index 0000000000000000000000000000000000000000..70a299bdc43531d891f7f6ab0f0c43f22a443b3d Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_min.cpp.0B95E75A805DE595.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_min.hpp.B0727F2EA98E1F00.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_min.hpp.B0727F2EA98E1F00.idx new file mode 100755 index 0000000000000000000000000000000000000000..51496e3be05758720b4d9591f333984b1e825b52 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_min.hpp.B0727F2EA98E1F00.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp.FA19AE81B0ABEEED.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp.FA19AE81B0ABEEED.idx new file mode 100755 index 0000000000000000000000000000000000000000..635672651ed9d212d4685e32ac243ed10fde979c Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp.FA19AE81B0ABEEED.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp.7E4B9D2EA0AA55ED.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp.7E4B9D2EA0AA55ED.idx new file mode 100755 index 0000000000000000000000000000000000000000..e8c4241da32e4dec6dad693536358b2028293d7c Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp.7E4B9D2EA0AA55ED.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_add.cpp.B429C5C7F2FA95D4.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_add.cpp.B429C5C7F2FA95D4.idx new file mode 100755 index 0000000000000000000000000000000000000000..6d8f0986c35cafca3bcca695c74c42acc641ba48 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_add.cpp.B429C5C7F2FA95D4.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_add.hpp.5BB1B8BA3E60EF71.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_add.hpp.5BB1B8BA3E60EF71.idx new file mode 100755 index 0000000000000000000000000000000000000000..2bf92945dacdfe77788d58f94f750cb6faa2dd5e Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_add.hpp.5BB1B8BA3E60EF71.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp.CFF7599DE0E79FBB.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp.CFF7599DE0E79FBB.idx new file mode 100755 index 0000000000000000000000000000000000000000..3c55d86c0b66dfc5837aa2d520f5f7255351aae2 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp.CFF7599DE0E79FBB.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp.46A8C8C2BE2BA19E.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp.46A8C8C2BE2BA19E.idx new file mode 100755 index 0000000000000000000000000000000000000000..8d08b133051bb48c69af5755b76e4851942a3cc9 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp.46A8C8C2BE2BA19E.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp.384AFFA95058603D.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp.384AFFA95058603D.idx new file mode 100755 index 0000000000000000000000000000000000000000..ef168aa65171a42f2259acaf45cb1eda2c5c62aa Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp.384AFFA95058603D.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp.54BEF21DD1A84A95.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp.54BEF21DD1A84A95.idx new file mode 100755 index 0000000000000000000000000000000000000000..cfc3cd50fde7d372c176ae5fbb1de26226338c5c Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp.54BEF21DD1A84A95.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_add.cpp.213C81B0FE4954D8.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_add.cpp.213C81B0FE4954D8.idx new file mode 100755 index 0000000000000000000000000000000000000000..8bd404ad4fe3a34941ddaa83ee0d66900a27dafb Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_add.cpp.213C81B0FE4954D8.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_add.hpp.496DCAD6F6453198.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_add.hpp.496DCAD6F6453198.idx new file mode 100755 index 0000000000000000000000000000000000000000..5ce0b637cf256eb4633d69edc0fae43dde67919e Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_add.hpp.496DCAD6F6453198.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp.13D835E23741A152.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp.13D835E23741A152.idx new file mode 100755 index 0000000000000000000000000000000000000000..5c08ed1331d6997f95aa9982ec05e7294109fab9 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp.13D835E23741A152.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp.2472648101515DFA.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp.2472648101515DFA.idx new file mode 100755 index 0000000000000000000000000000000000000000..7f1a31c7dffe588265e4a9cab454f4efd1b7a599 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp.2472648101515DFA.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp.11BCD7B99E3C6DB4.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp.11BCD7B99E3C6DB4.idx new file mode 100755 index 0000000000000000000000000000000000000000..8ee9cb2f225cccdbcfca2c23df550a690691c510 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp.11BCD7B99E3C6DB4.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp.E5A6055D6ADB74AC.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp.E5A6055D6ADB74AC.idx new file mode 100755 index 0000000000000000000000000000000000000000..ce4c28b93633a54e30986fe62e455b3f23d9a3c3 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp.E5A6055D6ADB74AC.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_max.cpp.3646CB646CAA9902.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_max.cpp.3646CB646CAA9902.idx new file mode 100755 index 0000000000000000000000000000000000000000..9cd1a3023851451126c71d28ef14440e9246a549 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_max.cpp.3646CB646CAA9902.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_max.hpp.58F762D7856C2DD8.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_max.hpp.58F762D7856C2DD8.idx new file mode 100755 index 0000000000000000000000000000000000000000..bd26efe70cdb014c0483c396f8b2fe2861c99b06 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_max.hpp.58F762D7856C2DD8.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_min.cpp.1EAEE023D4C34351.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_min.cpp.1EAEE023D4C34351.idx new file mode 100755 index 0000000000000000000000000000000000000000..f3ece7947de6a8acd96efd6361ad69571b81136a Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_min.cpp.1EAEE023D4C34351.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_min.hpp.94217680DDE41BC8.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_min.hpp.94217680DDE41BC8.idx new file mode 100755 index 0000000000000000000000000000000000000000..13f5d3bc6b8b59b40b8a814f981b0812fcc04fbf Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_min.hpp.94217680DDE41BC8.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp.5276760A8102F515.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp.5276760A8102F515.idx new file mode 100755 index 0000000000000000000000000000000000000000..868f24e5f3da06d36f2426c5aab0269ff191eba6 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp.5276760A8102F515.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp.52EC3DCBFC6F1222.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp.52EC3DCBFC6F1222.idx new file mode 100755 index 0000000000000000000000000000000000000000..0034e55c5ebb7968baaa9abebb038841d3470ca5 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp.52EC3DCBFC6F1222.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_add.cpp.BC429E8BCB688FC0.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_add.cpp.BC429E8BCB688FC0.idx new file mode 100755 index 0000000000000000000000000000000000000000..b46a273e0e0b22e341ab42380847d6864be56864 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_add.cpp.BC429E8BCB688FC0.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_add.hpp.29384EA1BE892314.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_add.hpp.29384EA1BE892314.idx new file mode 100755 index 0000000000000000000000000000000000000000..205092d6d92d598b93cc231f5f355bae6548a72f Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_add.hpp.29384EA1BE892314.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp.C4B64A596D6DF817.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp.C4B64A596D6DF817.idx new file mode 100755 index 0000000000000000000000000000000000000000..aa5be541b4f1fa6953b75d9e54b59e859ec843cc Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp.C4B64A596D6DF817.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp.615A6DAAC49A37A5.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp.615A6DAAC49A37A5.idx new file mode 100755 index 0000000000000000000000000000000000000000..d0a3c592eb20d7c7450414ab67597006b708f8f0 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp.615A6DAAC49A37A5.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp.9FC33F8197771FF0.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp.9FC33F8197771FF0.idx new file mode 100755 index 0000000000000000000000000000000000000000..5d1ecb69618fded34da9b108a9f95ca85a1bae9f Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp.9FC33F8197771FF0.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp.CF7CEDDC624CFFAF.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp.CF7CEDDC624CFFAF.idx new file mode 100755 index 0000000000000000000000000000000000000000..9e41d8b46c4cd625ed62243215bf5d52f7128042 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp.CF7CEDDC624CFFAF.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_max.cpp.20716445BE6CBC5D.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_max.cpp.20716445BE6CBC5D.idx new file mode 100755 index 0000000000000000000000000000000000000000..97119d91a68d377f08ea6c7c25f91d829125f6e2 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_max.cpp.20716445BE6CBC5D.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_max.hpp.A0110584C215CD9C.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_max.hpp.A0110584C215CD9C.idx new file mode 100755 index 0000000000000000000000000000000000000000..e9f0ca96d054951d80b881e0d5d3de93b30ae0f8 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_max.hpp.A0110584C215CD9C.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_min.cpp.D8BEF9441B9F4BC8.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_min.cpp.D8BEF9441B9F4BC8.idx new file mode 100755 index 0000000000000000000000000000000000000000..d9101b8dae05f7ae2e1266e5507ee0bcbb824021 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_min.cpp.D8BEF9441B9F4BC8.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_min.hpp.8F13163D01C8AE8F.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_min.hpp.8F13163D01C8AE8F.idx new file mode 100755 index 0000000000000000000000000000000000000000..30a528ffa35cf83f16e1714010a703b147df67a1 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_min.hpp.8F13163D01C8AE8F.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_impl_common.hpp.F2201C19BAE9FFE8.idx b/.cache/clangd/index/device_reduce_instance_impl_common.hpp.F2201C19BAE9FFE8.idx new file mode 100755 index 0000000000000000000000000000000000000000..c3b24698cfe43289e475d85e1a2e935b6719fe4b Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_impl_common.hpp.F2201C19BAE9FFE8.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add.hpp.F65D86E6C84EDA60.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add.hpp.F65D86E6C84EDA60.idx new file mode 100755 index 0000000000000000000000000000000000000000..a457c7c5b8a96923a4ba21c1c79a09e395fc6fae Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add.hpp.F65D86E6C84EDA60.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp.5E6D995E61ADA8BC.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp.5E6D995E61ADA8BC.idx new file mode 100755 index 0000000000000000000000000000000000000000..24fe3dcfc0343262d89188bfdc0734cd07019bd8 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp.5E6D995E61ADA8BC.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp.510427928F98A02B.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp.510427928F98A02B.idx new file mode 100755 index 0000000000000000000000000000000000000000..e1e0b187bceb6363df8dddbec032d32f3fc53dac Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp.510427928F98A02B.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp.54085188C8447423.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp.54085188C8447423.idx new file mode 100755 index 0000000000000000000000000000000000000000..7695580feab1bbf91b84a8f45d3e1cb99ab50ebb Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp.54085188C8447423.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp.0ADD6329411B78B1.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp.0ADD6329411B78B1.idx new file mode 100755 index 0000000000000000000000000000000000000000..3a017998f92fe5d9079b029700c98feb5cdb2183 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp.0ADD6329411B78B1.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp.4393525F2F4E8DA5.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp.4393525F2F4E8DA5.idx new file mode 100755 index 0000000000000000000000000000000000000000..26590678654249a2db9f82c01833752523cf9307 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp.4393525F2F4E8DA5.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp.21E4CBB422BF8D82.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp.21E4CBB422BF8D82.idx new file mode 100755 index 0000000000000000000000000000000000000000..3149ea0aa424b85effaf07e00489c3868313d9c1 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp.21E4CBB422BF8D82.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp.4BD601E4D7DE370C.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp.4BD601E4D7DE370C.idx new file mode 100755 index 0000000000000000000000000000000000000000..95fe520793d28444b817e58930e0d7510884d5d5 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp.4BD601E4D7DE370C.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp.619ED16F95775BEA.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp.619ED16F95775BEA.idx new file mode 100755 index 0000000000000000000000000000000000000000..3fc5753205f0fb65114d8157b2d2ce85397e1490 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp.619ED16F95775BEA.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp.873C6FA8487AF602.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp.873C6FA8487AF602.idx new file mode 100755 index 0000000000000000000000000000000000000000..973d3cf99414a0407ad6519d405b88db513bd42e Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp.873C6FA8487AF602.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp.DA5C10F93175C08B.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp.DA5C10F93175C08B.idx new file mode 100755 index 0000000000000000000000000000000000000000..ffba7fd45cfb6c1d5cff7175e8fe57f998061b98 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp.DA5C10F93175C08B.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp.A62B76883CD2E814.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp.A62B76883CD2E814.idx new file mode 100755 index 0000000000000000000000000000000000000000..b92535801bc151dc087ac8deb744f5834fd97925 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp.A62B76883CD2E814.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp.82487B9F10609357.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp.82487B9F10609357.idx new file mode 100755 index 0000000000000000000000000000000000000000..06f0becd2223c42169b24ecdf49ca709b3464798 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp.82487B9F10609357.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp.979C0C60ED1AF7B3.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp.979C0C60ED1AF7B3.idx new file mode 100755 index 0000000000000000000000000000000000000000..24d95cc28b6444c01ebc48bdbe8d0825e828886a Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp.979C0C60ED1AF7B3.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp.16AAA28D16A62BE9.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp.16AAA28D16A62BE9.idx new file mode 100755 index 0000000000000000000000000000000000000000..5c5c8da3ab6288be84f5c5bdc4f19a953079b3d6 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp.16AAA28D16A62BE9.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp.3DCB7674F6E295A2.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp.3DCB7674F6E295A2.idx new file mode 100755 index 0000000000000000000000000000000000000000..841c9cb2372de1f3b973848515372616bfe28e1a Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp.3DCB7674F6E295A2.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp.3C8AE5F8E636FD38.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp.3C8AE5F8E636FD38.idx new file mode 100755 index 0000000000000000000000000000000000000000..e7ed5b597524a3915820cffb2a3de29bdc6f923e Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp.3C8AE5F8E636FD38.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp.EC103046C3FCC222.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp.EC103046C3FCC222.idx new file mode 100755 index 0000000000000000000000000000000000000000..4101a3702af012a2bb4b9f686e402ffa9dc1f76a Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp.EC103046C3FCC222.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp.EC5F0E8B3A5B73CB.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp.EC5F0E8B3A5B73CB.idx new file mode 100755 index 0000000000000000000000000000000000000000..bcee92fb44a4c6cdfb9fddd0190ee7d096929431 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp.EC5F0E8B3A5B73CB.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp.F69FD80D45178C79.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp.F69FD80D45178C79.idx new file mode 100755 index 0000000000000000000000000000000000000000..b248cc730c55ee849a50b9f2dae840ce5fb9fe82 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp.F69FD80D45178C79.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp.F6C8C49B5460F242.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp.F6C8C49B5460F242.idx new file mode 100755 index 0000000000000000000000000000000000000000..2f8bfb763897ec39edbabdb6d8cf4f62eccb7051 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp.F6C8C49B5460F242.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise.hpp.C398C657987C23EC.idx b/.cache/clangd/index/device_reduce_instance_threadwise.hpp.C398C657987C23EC.idx new file mode 100755 index 0000000000000000000000000000000000000000..00f79324f4069cec64f2b7348a5547ad949948e4 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise.hpp.C398C657987C23EC.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_add.cpp.7EFDD1FE32B6F3E3.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_add.cpp.7EFDD1FE32B6F3E3.idx new file mode 100755 index 0000000000000000000000000000000000000000..0ae241139573a507d0abccd82d4ac9d5568414cb Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_add.cpp.7EFDD1FE32B6F3E3.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_add.hpp.2D7F2A668C85C670.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_add.hpp.2D7F2A668C85C670.idx new file mode 100755 index 0000000000000000000000000000000000000000..ad4959b9df592cef35e8edc23cf7146f87e5b2ed Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_add.hpp.2D7F2A668C85C670.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp.DCF9F78C2573F20F.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp.DCF9F78C2573F20F.idx new file mode 100755 index 0000000000000000000000000000000000000000..42b90a2dc420591e77909bbb1b1c1f5e9b25e02c Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp.DCF9F78C2573F20F.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp.E64E06F9E993015F.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp.E64E06F9E993015F.idx new file mode 100755 index 0000000000000000000000000000000000000000..e2016d379274ec1e8841b2342d73291967bcb9b4 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp.E64E06F9E993015F.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp.71BA7AE5BF315EB4.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp.71BA7AE5BF315EB4.idx new file mode 100755 index 0000000000000000000000000000000000000000..5369d9d439198dda6bb89834cca27f1849108817 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp.71BA7AE5BF315EB4.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp.5814F92CA6CF0198.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp.5814F92CA6CF0198.idx new file mode 100755 index 0000000000000000000000000000000000000000..0101b167611352a4aa412943cdcd17254f75d4f3 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp.5814F92CA6CF0198.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_max.cpp.D9E364199F88743A.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_max.cpp.D9E364199F88743A.idx new file mode 100755 index 0000000000000000000000000000000000000000..32f80edd744719ab025577ae62d972aecd4ae2a2 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_max.cpp.D9E364199F88743A.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_max.hpp.B8A4A04F978ABC79.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_max.hpp.B8A4A04F978ABC79.idx new file mode 100755 index 0000000000000000000000000000000000000000..afb82de642d93e6cb664d7da2622952c60b48277 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_max.hpp.B8A4A04F978ABC79.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_min.cpp.B60965F1C97196EF.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_min.cpp.B60965F1C97196EF.idx new file mode 100755 index 0000000000000000000000000000000000000000..694c861797c0d8ae21a1e97fb7c43b9ea1fda0a1 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_min.cpp.B60965F1C97196EF.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_min.hpp.97362179A977A149.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_min.hpp.97362179A977A149.idx new file mode 100755 index 0000000000000000000000000000000000000000..421d4c6defa7e2344b2d0e8e1c2ce3881cf26070 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_min.hpp.97362179A977A149.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp.C510348DB0E9EAF4.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp.C510348DB0E9EAF4.idx new file mode 100755 index 0000000000000000000000000000000000000000..8d4f2c8f676e9a557e322f39d260b87ca3ecbbbe Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp.C510348DB0E9EAF4.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp.B82C759E6266A9D7.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp.B82C759E6266A9D7.idx new file mode 100755 index 0000000000000000000000000000000000000000..434021860bc8d3f12742c51f2789a45ec4251a64 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp.B82C759E6266A9D7.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp.6F6EC6F8F6066B26.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp.6F6EC6F8F6066B26.idx new file mode 100755 index 0000000000000000000000000000000000000000..b3221458b780c6c925b7d6fab1ae4baddaf23e98 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp.6F6EC6F8F6066B26.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp.2B3A1DBDB134572E.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp.2B3A1DBDB134572E.idx new file mode 100755 index 0000000000000000000000000000000000000000..9b90a662bea2069ee080b11bcf42aa351e067adf Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp.2B3A1DBDB134572E.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_max.cpp.501586DF17B9409B.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_max.cpp.501586DF17B9409B.idx new file mode 100755 index 0000000000000000000000000000000000000000..5d714d2d3758bd7a305375a17f0ea8b8ba5c60be Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_max.cpp.501586DF17B9409B.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_max.hpp.EA77360039638CEE.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_max.hpp.EA77360039638CEE.idx new file mode 100755 index 0000000000000000000000000000000000000000..0732c7d85383c93632632b9b21c4a860b5fd93df Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_max.hpp.EA77360039638CEE.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_min.cpp.F85FE38D0DCF8C44.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_min.cpp.F85FE38D0DCF8C44.idx new file mode 100755 index 0000000000000000000000000000000000000000..a2b08f506df5de5034832daa96bdb335a2169f3f Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_min.cpp.F85FE38D0DCF8C44.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_min.hpp.D976B4CDF936A58A.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_min.hpp.D976B4CDF936A58A.idx new file mode 100755 index 0000000000000000000000000000000000000000..7a4c330698dde5cf1ccef220d2117371d835ebcf Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_min.hpp.D976B4CDF936A58A.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_add.cpp.630A4073E2B46FEA.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_add.cpp.630A4073E2B46FEA.idx new file mode 100755 index 0000000000000000000000000000000000000000..92ab9dd70500765a54bb844174be48f450a51454 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_add.cpp.630A4073E2B46FEA.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_add.hpp.7BFEE24C60F74EC6.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_add.hpp.7BFEE24C60F74EC6.idx new file mode 100755 index 0000000000000000000000000000000000000000..574fac2616f4d1e6ec5763b22fc33e9e4bae2088 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_add.hpp.7BFEE24C60F74EC6.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp.BB154BAE452FBECD.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp.BB154BAE452FBECD.idx new file mode 100755 index 0000000000000000000000000000000000000000..7b9af3c032b98c32f1e012b8640b547e4ebda316 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp.BB154BAE452FBECD.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp.442706BD47327B2E.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp.442706BD47327B2E.idx new file mode 100755 index 0000000000000000000000000000000000000000..7e2046bb1688cf157d43953d2bf3027d1ab9567d Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp.442706BD47327B2E.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp.01982D48C410CB47.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp.01982D48C410CB47.idx new file mode 100755 index 0000000000000000000000000000000000000000..261169ee219fa00e7d474ad5a306d6b89d256da2 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp.01982D48C410CB47.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp.D90C9C6069808C78.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp.D90C9C6069808C78.idx new file mode 100755 index 0000000000000000000000000000000000000000..c66bbd1d4b79192ae5fc0597e507883e25ebde5c Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp.D90C9C6069808C78.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_add.cpp.30899F9F46343B0E.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_add.cpp.30899F9F46343B0E.idx new file mode 100755 index 0000000000000000000000000000000000000000..167020fde34f36e1d178a516b32b14d4e7df6f75 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_add.cpp.30899F9F46343B0E.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_add.hpp.3C1359A00C377B18.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_add.hpp.3C1359A00C377B18.idx new file mode 100755 index 0000000000000000000000000000000000000000..cb085c1db7b7335fa0e5fc13df265b3682da9170 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_add.hpp.3C1359A00C377B18.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp.F787647EC7B1DE3C.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp.F787647EC7B1DE3C.idx new file mode 100755 index 0000000000000000000000000000000000000000..6aabacf3732f54e8b55973c298464a3e0135d280 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp.F787647EC7B1DE3C.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp.2ABD3DA274EED7C0.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp.2ABD3DA274EED7C0.idx new file mode 100755 index 0000000000000000000000000000000000000000..f818c5f0ed02c3752da922226c5c41fc4efeae8d Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp.2ABD3DA274EED7C0.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp.0D0748D60784A612.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp.0D0748D60784A612.idx new file mode 100755 index 0000000000000000000000000000000000000000..662b3b08bdcdc30e2ff3c9e963e3befcad695dcd Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp.0D0748D60784A612.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp.F51F72472293693C.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp.F51F72472293693C.idx new file mode 100755 index 0000000000000000000000000000000000000000..5ac69dcedad0fa4ccafc8908ca31d6907d1c6c8f Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp.F51F72472293693C.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_max.cpp.35DDEC8F63798EAB.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_max.cpp.35DDEC8F63798EAB.idx new file mode 100755 index 0000000000000000000000000000000000000000..ac553b8d2d0a75cba263b82c4cb959df701c71ae Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_max.cpp.35DDEC8F63798EAB.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_max.hpp.4E3093BB6320EF1D.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_max.hpp.4E3093BB6320EF1D.idx new file mode 100755 index 0000000000000000000000000000000000000000..e78e942fb045b23ea43014d78f7b469770283c4e Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_max.hpp.4E3093BB6320EF1D.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_min.cpp.1CE993853CF695D0.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_min.cpp.1CE993853CF695D0.idx new file mode 100755 index 0000000000000000000000000000000000000000..6e2a4c15157147207d56b74a6c0c8bd4a38a19d9 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_min.cpp.1CE993853CF695D0.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_min.hpp.448A5294C15B52AE.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_min.hpp.448A5294C15B52AE.idx new file mode 100755 index 0000000000000000000000000000000000000000..126304571130584b1617f16d4d618307fd5c8f47 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_min.hpp.448A5294C15B52AE.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp.F71A95723F67A7AA.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp.F71A95723F67A7AA.idx new file mode 100755 index 0000000000000000000000000000000000000000..b88cc6233419f2e3b09e95a2adffccace55ac0f6 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp.F71A95723F67A7AA.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp.1D44113CF201458F.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp.1D44113CF201458F.idx new file mode 100755 index 0000000000000000000000000000000000000000..33a9defdc944f8650a6d492b8316830a5861eb83 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp.1D44113CF201458F.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_add.cpp.DBE748926DFEDB8B.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_add.cpp.DBE748926DFEDB8B.idx new file mode 100755 index 0000000000000000000000000000000000000000..8621f2942752baf4fe16add8105b1fbb67ccee5b Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_add.cpp.DBE748926DFEDB8B.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_add.hpp.55EEA55DB1346E7B.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_add.hpp.55EEA55DB1346E7B.idx new file mode 100755 index 0000000000000000000000000000000000000000..a7dbc3480eb4959ee371f7a02ad3103620b5fc99 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_add.hpp.55EEA55DB1346E7B.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp.E1314404E465BA33.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp.E1314404E465BA33.idx new file mode 100755 index 0000000000000000000000000000000000000000..01f696025a8d50e9ecbe17386bca2040fe062e2e Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp.E1314404E465BA33.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp.D9B461C00E68A892.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp.D9B461C00E68A892.idx new file mode 100755 index 0000000000000000000000000000000000000000..fe519a6aad01205ad91e23775551550b233578cb Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp.D9B461C00E68A892.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp.C0DCB6010E545A4E.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp.C0DCB6010E545A4E.idx new file mode 100755 index 0000000000000000000000000000000000000000..e5cff3fd009784ebc311f9eea7197b98d326f403 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp.C0DCB6010E545A4E.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp.082457B5EE500DB0.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp.082457B5EE500DB0.idx new file mode 100755 index 0000000000000000000000000000000000000000..fdd7fad76fc5fdee8014684fddaf6df22a383f48 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp.082457B5EE500DB0.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_add.cpp.FF7D2E3E29E0477B.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_add.cpp.FF7D2E3E29E0477B.idx new file mode 100755 index 0000000000000000000000000000000000000000..794d9a1782d07b85bb219ec23a20058c32677b6a Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_add.cpp.FF7D2E3E29E0477B.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_add.hpp.774FB922ABF22CDD.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_add.hpp.774FB922ABF22CDD.idx new file mode 100755 index 0000000000000000000000000000000000000000..5f16201beac8847c0854235eea9c5139ff4cb6e3 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_add.hpp.774FB922ABF22CDD.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp.095F5D6B26D2F29A.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp.095F5D6B26D2F29A.idx new file mode 100755 index 0000000000000000000000000000000000000000..1b63a378e8878b8931211e10238938fe0556c9f9 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp.095F5D6B26D2F29A.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp.5FCEC88A65C41974.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp.5FCEC88A65C41974.idx new file mode 100755 index 0000000000000000000000000000000000000000..479067d58cba32bb2311f53e1ded648016c3b166 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp.5FCEC88A65C41974.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp.BB5B12454481FF04.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp.BB5B12454481FF04.idx new file mode 100755 index 0000000000000000000000000000000000000000..a1e80fa3e1a3ae7b4b74cf3564c06012b032258b Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp.BB5B12454481FF04.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp.7B5AB5E79B93DFFB.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp.7B5AB5E79B93DFFB.idx new file mode 100755 index 0000000000000000000000000000000000000000..ae3487e2bbdb03f4c226df304bce5f9afbe45d73 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp.7B5AB5E79B93DFFB.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_max.cpp.BAF3E66A0EDF3C75.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_max.cpp.BAF3E66A0EDF3C75.idx new file mode 100755 index 0000000000000000000000000000000000000000..6c63edcd6e14f289248d7d5fa92ad7bd712e72b7 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_max.cpp.BAF3E66A0EDF3C75.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_max.hpp.390E8CBCF4E86ABF.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_max.hpp.390E8CBCF4E86ABF.idx new file mode 100755 index 0000000000000000000000000000000000000000..b5132a9be50456c9ce0a9085263c31b0d2d5d225 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_max.hpp.390E8CBCF4E86ABF.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_min.cpp.778D73C35496348B.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_min.cpp.778D73C35496348B.idx new file mode 100755 index 0000000000000000000000000000000000000000..6714bc73449fe6abb00099504af176f58dcaf1f0 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_min.cpp.778D73C35496348B.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_min.hpp.2C7338E3132300C6.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_min.hpp.2C7338E3132300C6.idx new file mode 100755 index 0000000000000000000000000000000000000000..2307d370468e6ca5c1f2a5b12536ef5c165d3d27 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_min.hpp.2C7338E3132300C6.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp.59008E97BEF36435.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp.59008E97BEF36435.idx new file mode 100755 index 0000000000000000000000000000000000000000..e0a9743cf2fa00ff00eebde6bcf74fb66c4ec618 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp.59008E97BEF36435.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp.4329A8B0483B68A4.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp.4329A8B0483B68A4.idx new file mode 100755 index 0000000000000000000000000000000000000000..d60fd8507f012b99f23393f02351f79e8b236e41 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp.4329A8B0483B68A4.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_add.cpp.509D27379633375F.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_add.cpp.509D27379633375F.idx new file mode 100755 index 0000000000000000000000000000000000000000..e26d1ea370581c908684f4c956bca140c399daee Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_add.cpp.509D27379633375F.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_add.hpp.7F575A4F4970CC92.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_add.hpp.7F575A4F4970CC92.idx new file mode 100755 index 0000000000000000000000000000000000000000..96731ede63f35a1ba63f0eb12a7af674bb49e871 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_add.hpp.7F575A4F4970CC92.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp.BBCEE5A5D01A00A5.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp.BBCEE5A5D01A00A5.idx new file mode 100755 index 0000000000000000000000000000000000000000..de1ee93dc10c407c9bba1071e642be945fea160c Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp.BBCEE5A5D01A00A5.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp.FDD16FEA381DDE62.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp.FDD16FEA381DDE62.idx new file mode 100755 index 0000000000000000000000000000000000000000..ba1a25667ad2c40c83426eff545e96f751068f3f Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp.FDD16FEA381DDE62.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp.C6B6423A4D3C4156.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp.C6B6423A4D3C4156.idx new file mode 100755 index 0000000000000000000000000000000000000000..e87b3c98f73d8d64cde58f5dac71166c19dd6150 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp.C6B6423A4D3C4156.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp.F8ED464DDA5BD732.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp.F8ED464DDA5BD732.idx new file mode 100755 index 0000000000000000000000000000000000000000..408cd306300fd35cc88bfa05b89b7162a10f2e63 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp.F8ED464DDA5BD732.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_max.cpp.C1E70E7984041F16.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_max.cpp.C1E70E7984041F16.idx new file mode 100755 index 0000000000000000000000000000000000000000..9042dbabf4119f2a462340bf8e4513bdce1c01c2 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_max.cpp.C1E70E7984041F16.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_max.hpp.1F0CCA2641B90428.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_max.hpp.1F0CCA2641B90428.idx new file mode 100755 index 0000000000000000000000000000000000000000..f90de1f4f13d29eb5e554c3818126d85df4dd32b Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_max.hpp.1F0CCA2641B90428.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_min.cpp.05E7FDBEF4E2FEA1.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_min.cpp.05E7FDBEF4E2FEA1.idx new file mode 100755 index 0000000000000000000000000000000000000000..d640fb62c9cd3345aa5df9014a82c6bce11a71b7 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_min.cpp.05E7FDBEF4E2FEA1.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_min.hpp.078E8B6FD22E7DA4.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_min.hpp.078E8B6FD22E7DA4.idx new file mode 100755 index 0000000000000000000000000000000000000000..759a6f7d178b65f0b697f386cdf635d2c5a8490b Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_min.hpp.078E8B6FD22E7DA4.idx differ diff --git a/.cache/clangd/index/device_reduce_multiblock.hpp.C0B69E5EA2AC7CE7.idx b/.cache/clangd/index/device_reduce_multiblock.hpp.C0B69E5EA2AC7CE7.idx new file mode 100755 index 0000000000000000000000000000000000000000..e7dd55f72e311fbfccc807785c364b291874fbbb Binary files /dev/null and b/.cache/clangd/index/device_reduce_multiblock.hpp.C0B69E5EA2AC7CE7.idx differ diff --git a/.cache/clangd/index/device_reduce_threadwise.hpp.71DBCBAA668DE14B.idx b/.cache/clangd/index/device_reduce_threadwise.hpp.71DBCBAA668DE14B.idx new file mode 100755 index 0000000000000000000000000000000000000000..7ff39115d190d87936f9c3614e8668da24c048ca Binary files /dev/null and b/.cache/clangd/index/device_reduce_threadwise.hpp.71DBCBAA668DE14B.idx differ diff --git a/.cache/clangd/index/device_softmax.hpp.497B752134CE80EA.idx b/.cache/clangd/index/device_softmax.hpp.497B752134CE80EA.idx new file mode 100755 index 0000000000000000000000000000000000000000..2799d7a00b7d5cb5552361ba7edd9b3cfec63392 Binary files /dev/null and b/.cache/clangd/index/device_softmax.hpp.497B752134CE80EA.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce1.cpp.6DE6521393D18F5C.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce1.cpp.6DE6521393D18F5C.idx new file mode 100755 index 0000000000000000000000000000000000000000..2d715ac5c2eed19ccadb0d8b96fac9635a865995 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce1.cpp.6DE6521393D18F5C.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce1.hpp.B94CF584D85EABE4.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce1.hpp.B94CF584D85EABE4.idx new file mode 100755 index 0000000000000000000000000000000000000000..db413c0392b58e5f9d940e0f67f19b8ed8b044fc Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce1.hpp.B94CF584D85EABE4.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce2.cpp.6002EE43735BD2AD.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce2.cpp.6002EE43735BD2AD.idx new file mode 100755 index 0000000000000000000000000000000000000000..abc04700944b02f27ad99f93a30577f0af051169 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce2.cpp.6002EE43735BD2AD.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce2.hpp.453CC2F7EB0502D0.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce2.hpp.453CC2F7EB0502D0.idx new file mode 100755 index 0000000000000000000000000000000000000000..999ca4064e4555d4964d69eecab105dfe7ef7c97 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce2.hpp.453CC2F7EB0502D0.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce3.cpp.62DA65F400167AF3.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce3.cpp.62DA65F400167AF3.idx new file mode 100755 index 0000000000000000000000000000000000000000..36f8af3325a5c03d8879e92ddf9ddc4e26a0865e Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce3.cpp.62DA65F400167AF3.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce3.hpp.3F8E078976E83385.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce3.hpp.3F8E078976E83385.idx new file mode 100755 index 0000000000000000000000000000000000000000..9c8754b520921ffc882f323ab6c0a4858d829238 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce3.hpp.3F8E078976E83385.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce1.cpp.22BB990A62BE50C5.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce1.cpp.22BB990A62BE50C5.idx new file mode 100755 index 0000000000000000000000000000000000000000..f588676bd65f474895ce4713a432ca01c0baf962 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce1.cpp.22BB990A62BE50C5.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce1.hpp.7F7D38DA481813D7.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce1.hpp.7F7D38DA481813D7.idx new file mode 100755 index 0000000000000000000000000000000000000000..b01e47c715e75f1f3dac03742edeaf69f3cf2b79 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce1.hpp.7F7D38DA481813D7.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce2.cpp.AC6F1FB653149557.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce2.cpp.AC6F1FB653149557.idx new file mode 100755 index 0000000000000000000000000000000000000000..36fcb19c6edafc7d2c090ee4759406ecadc1a677 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce2.cpp.AC6F1FB653149557.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce2.hpp.44F668BC92D2FB11.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce2.hpp.44F668BC92D2FB11.idx new file mode 100755 index 0000000000000000000000000000000000000000..2b0c203b8b8099bf7da78d630c76a658ba96948e Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce2.hpp.44F668BC92D2FB11.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce3.cpp.2BDC7BD673407442.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce3.cpp.2BDC7BD673407442.idx new file mode 100755 index 0000000000000000000000000000000000000000..3887f062475efce3e35684866b25d0d6767845ff Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce3.cpp.2BDC7BD673407442.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce3.hpp.1A5543D9F44A4D1E.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce3.hpp.1A5543D9F44A4D1E.idx new file mode 100755 index 0000000000000000000000000000000000000000..a79cb2efcb55bee4447d22e1b0ef384ceda26693 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce3.hpp.1A5543D9F44A4D1E.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce4.cpp.E94DE85158A7059C.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce4.cpp.E94DE85158A7059C.idx new file mode 100755 index 0000000000000000000000000000000000000000..8d68770e0ad4848883e2635d1a6c714ca3fa5bcd Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce4.cpp.E94DE85158A7059C.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce4.hpp.DFA0CA7449260515.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce4.hpp.DFA0CA7449260515.idx new file mode 100755 index 0000000000000000000000000000000000000000..96e72e1a473508fa605b0f48256e1c863cdf17b7 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce4.hpp.DFA0CA7449260515.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_type.hpp.D6CB9472D3A0B40A.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_type.hpp.D6CB9472D3A0B40A.idx new file mode 100755 index 0000000000000000000000000000000000000000..e2dfe73499163e1b2af9ab097daf7971ffe2ce9b Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_type.hpp.D6CB9472D3A0B40A.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce1.cpp.7A9B6FF1C1DE5769.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce1.cpp.7A9B6FF1C1DE5769.idx new file mode 100755 index 0000000000000000000000000000000000000000..13642983a81caea3edde571f0c43c369bbfe0c3a Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce1.cpp.7A9B6FF1C1DE5769.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce1.hpp.901763453949395E.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce1.hpp.901763453949395E.idx new file mode 100755 index 0000000000000000000000000000000000000000..599375e3d9934de49b72db608a37cc78ea251d31 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce1.hpp.901763453949395E.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce2.cpp.B1019C659E1C387F.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce2.cpp.B1019C659E1C387F.idx new file mode 100755 index 0000000000000000000000000000000000000000..320b79c5e3f4b726555bf7a2e0874edf6c563add Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce2.cpp.B1019C659E1C387F.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce2.hpp.E7E5CB6E00C1ACF7.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce2.hpp.E7E5CB6E00C1ACF7.idx new file mode 100755 index 0000000000000000000000000000000000000000..737214420ac7bc771ba6785af1e8a18493d0bdec Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce2.hpp.E7E5CB6E00C1ACF7.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce3.cpp.D8971A21BC00D0FF.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce3.cpp.D8971A21BC00D0FF.idx new file mode 100755 index 0000000000000000000000000000000000000000..c5f011f2eec86bc2d2313772fd963a1bcf9ab882 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce3.cpp.D8971A21BC00D0FF.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce3.hpp.5132DD2F1F4E6659.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce3.hpp.5132DD2F1F4E6659.idx new file mode 100755 index 0000000000000000000000000000000000000000..9bd481892b41fe64f386bb5a37bb79238a7c219b Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce3.hpp.5132DD2F1F4E6659.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce1.cpp.70DAADE138E1FD1C.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce1.cpp.70DAADE138E1FD1C.idx new file mode 100755 index 0000000000000000000000000000000000000000..0f68683264140ef36bafde8bed8219f0275dccab Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce1.cpp.70DAADE138E1FD1C.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce1.hpp.D21E6E14C4987978.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce1.hpp.D21E6E14C4987978.idx new file mode 100755 index 0000000000000000000000000000000000000000..0b2c120f27d5569119f7a65f7850fb86d1eef493 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce1.hpp.D21E6E14C4987978.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce2.cpp.C8298BB69F4C257A.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce2.cpp.C8298BB69F4C257A.idx new file mode 100755 index 0000000000000000000000000000000000000000..7b38bfaee335ee353f58f9f4c2bb1217de72368c Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce2.cpp.C8298BB69F4C257A.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce2.hpp.E0AE02E34E90B0FC.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce2.hpp.E0AE02E34E90B0FC.idx new file mode 100755 index 0000000000000000000000000000000000000000..39c9db3eab2a87f361095c16b9d9898c08af3ce9 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce2.hpp.E0AE02E34E90B0FC.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce3.cpp.25E3FD604E588BD0.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce3.cpp.25E3FD604E588BD0.idx new file mode 100755 index 0000000000000000000000000000000000000000..8e8ad0775e1fee761d3aad40b44bec0a26a14849 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce3.cpp.25E3FD604E588BD0.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce3.hpp.33AFA8695A15063D.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce3.hpp.33AFA8695A15063D.idx new file mode 100755 index 0000000000000000000000000000000000000000..e295fdfaa31b8f3a10458f859f2a4658ceaabc6b Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce3.hpp.33AFA8695A15063D.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce4.cpp.86DA1C4D6D7DC8E7.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce4.cpp.86DA1C4D6D7DC8E7.idx new file mode 100755 index 0000000000000000000000000000000000000000..7e9f9bfd7c52bf7b32659fb6fb75dee2e6e208e3 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce4.cpp.86DA1C4D6D7DC8E7.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce4.hpp.35C1B364BF55F9FD.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce4.hpp.35C1B364BF55F9FD.idx new file mode 100755 index 0000000000000000000000000000000000000000..7d193ff70367f53bcd9be7fc3c676f2e397456d1 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce4.hpp.35C1B364BF55F9FD.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_type.hpp.7F3C1693216AB251.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_type.hpp.7F3C1693216AB251.idx new file mode 100755 index 0000000000000000000000000000000000000000..60f68634bb30cfbc85b2372b166ef5eab6ff3b2c Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_type.hpp.7F3C1693216AB251.idx differ diff --git a/.cache/clangd/index/device_softmax_impl.hpp.B9E8182EF8FC3442.idx b/.cache/clangd/index/device_softmax_impl.hpp.B9E8182EF8FC3442.idx new file mode 100755 index 0000000000000000000000000000000000000000..1b723e0dd8e430a81919aba720ad4efe02b0e999 Binary files /dev/null and b/.cache/clangd/index/device_softmax_impl.hpp.B9E8182EF8FC3442.idx differ diff --git a/.cache/clangd/index/device_softmax_instance.hpp.E665F0AD026D2FE2.idx b/.cache/clangd/index/device_softmax_instance.hpp.E665F0AD026D2FE2.idx new file mode 100755 index 0000000000000000000000000000000000000000..b22cfc111c7c7d1d183bb3e821576c2920fc94cf Binary files /dev/null and b/.cache/clangd/index/device_softmax_instance.hpp.E665F0AD026D2FE2.idx differ diff --git a/.cache/clangd/index/device_sparse_embeddings_forward_layernorm.hpp.469278C9F8399B0C.idx b/.cache/clangd/index/device_sparse_embeddings_forward_layernorm.hpp.469278C9F8399B0C.idx new file mode 100755 index 0000000000000000000000000000000000000000..90a8972502ae756c5548e91baa0ebe228ab9ccb8 Binary files /dev/null and b/.cache/clangd/index/device_sparse_embeddings_forward_layernorm.hpp.469278C9F8399B0C.idx differ diff --git a/.cache/clangd/index/device_splitk_contraction_multiple_d.hpp.B9F979311F947532.idx b/.cache/clangd/index/device_splitk_contraction_multiple_d.hpp.B9F979311F947532.idx new file mode 100755 index 0000000000000000000000000000000000000000..498ffaad97a47f8dc6a00f1f650833c6e6414887 Binary files /dev/null and b/.cache/clangd/index/device_splitk_contraction_multiple_d.hpp.B9F979311F947532.idx differ diff --git a/.cache/clangd/index/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp.43569A70DF974D2E.idx b/.cache/clangd/index/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp.43569A70DF974D2E.idx new file mode 100755 index 0000000000000000000000000000000000000000..db05bd26a7a51dcc37a2f702ad48d9a161709eff Binary files /dev/null and b/.cache/clangd/index/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp.43569A70DF974D2E.idx differ diff --git a/.cache/clangd/index/device_transpose_instance.hpp.9CF832E322CA1530.idx b/.cache/clangd/index/device_transpose_instance.hpp.9CF832E322CA1530.idx new file mode 100755 index 0000000000000000000000000000000000000000..0d8ef17f0653eb3e5e9c472ca0443cecd0abd69f Binary files /dev/null and b/.cache/clangd/index/device_transpose_instance.hpp.9CF832E322CA1530.idx differ diff --git a/.cache/clangd/index/device_transpose_instances_3d.cpp.5DC4547776B7C3A0.idx b/.cache/clangd/index/device_transpose_instances_3d.cpp.5DC4547776B7C3A0.idx new file mode 100755 index 0000000000000000000000000000000000000000..4483a90c0482bd9e231b8f747c598bc10fab3ca3 Binary files /dev/null and b/.cache/clangd/index/device_transpose_instances_3d.cpp.5DC4547776B7C3A0.idx differ diff --git a/.cache/clangd/index/dpp_gemm.hpp.CD386ABA025C1896.idx b/.cache/clangd/index/dpp_gemm.hpp.CD386ABA025C1896.idx new file mode 100755 index 0000000000000000000000000000000000000000..c000f5d7f1fc9c5caef0f5514cdc3a751ff8ed50 Binary files /dev/null and b/.cache/clangd/index/dpp_gemm.hpp.CD386ABA025C1896.idx differ diff --git a/.cache/clangd/index/dual_reduce_common.hpp.746AA107CF079C11.idx b/.cache/clangd/index/dual_reduce_common.hpp.746AA107CF079C11.idx new file mode 100755 index 0000000000000000000000000000000000000000..1fa3488651d58531a435691b37a02cdf47cf3ae7 Binary files /dev/null and b/.cache/clangd/index/dual_reduce_common.hpp.746AA107CF079C11.idx differ diff --git a/.cache/clangd/index/dual_reduce_multiblock.cpp.C5E150B4E9B1E7E8.idx b/.cache/clangd/index/dual_reduce_multiblock.cpp.C5E150B4E9B1E7E8.idx new file mode 100755 index 0000000000000000000000000000000000000000..40f1298f14ae9b80a15b264b97625155fa1a7e4a Binary files /dev/null and b/.cache/clangd/index/dual_reduce_multiblock.cpp.C5E150B4E9B1E7E8.idx differ diff --git a/.cache/clangd/index/dual_reduce_threadwise.cpp.7BA56606324C3914.idx b/.cache/clangd/index/dual_reduce_threadwise.cpp.7BA56606324C3914.idx new file mode 100755 index 0000000000000000000000000000000000000000..0e3349227d0f3e8a38734b984eedf59c1dc4cc72 Binary files /dev/null and b/.cache/clangd/index/dual_reduce_threadwise.cpp.7BA56606324C3914.idx differ diff --git a/.cache/clangd/index/dynamic_buffer.hpp.9B076D16FB6FDAA3.idx b/.cache/clangd/index/dynamic_buffer.hpp.9B076D16FB6FDAA3.idx new file mode 100755 index 0000000000000000000000000000000000000000..248bc9f465e9d992e1e2debbdf9e21ed75733106 Binary files /dev/null and b/.cache/clangd/index/dynamic_buffer.hpp.9B076D16FB6FDAA3.idx differ diff --git a/.cache/clangd/index/element_wise_operation.hpp.0715A40D7E0DC380.idx b/.cache/clangd/index/element_wise_operation.hpp.0715A40D7E0DC380.idx new file mode 100755 index 0000000000000000000000000000000000000000..21141e524593a6b8f711bec4cc3e201bda9ea30e Binary files /dev/null and b/.cache/clangd/index/element_wise_operation.hpp.0715A40D7E0DC380.idx differ diff --git a/.cache/clangd/index/elementwise_add_1d.cpp.CD0512697217A13B.idx b/.cache/clangd/index/elementwise_add_1d.cpp.CD0512697217A13B.idx new file mode 100755 index 0000000000000000000000000000000000000000..08d7ee996a273833614a112815957d5e1cac961d Binary files /dev/null and b/.cache/clangd/index/elementwise_add_1d.cpp.CD0512697217A13B.idx differ diff --git a/.cache/clangd/index/elementwise_add_4d.cpp.6D6ECABFC0DFD709.idx b/.cache/clangd/index/elementwise_add_4d.cpp.6D6ECABFC0DFD709.idx new file mode 100755 index 0000000000000000000000000000000000000000..59c76f0ad357d82e5ee11199c19180690a73f9fa Binary files /dev/null and b/.cache/clangd/index/elementwise_add_4d.cpp.6D6ECABFC0DFD709.idx differ diff --git a/.cache/clangd/index/elementwise_layernorm_blockwise.cpp.B1B3F8F277F6314A.idx b/.cache/clangd/index/elementwise_layernorm_blockwise.cpp.B1B3F8F277F6314A.idx new file mode 100755 index 0000000000000000000000000000000000000000..7c44c49b1d4ba1cb6b3f827dc9dd0f6326247769 Binary files /dev/null and b/.cache/clangd/index/elementwise_layernorm_blockwise.cpp.B1B3F8F277F6314A.idx differ diff --git a/.cache/clangd/index/elementwise_normalization.hpp.B86CF9747F7CBAA9.idx b/.cache/clangd/index/elementwise_normalization.hpp.B86CF9747F7CBAA9.idx new file mode 100755 index 0000000000000000000000000000000000000000..7f03b779edb8cd08942eb976092665070fa4b871 Binary files /dev/null and b/.cache/clangd/index/elementwise_normalization.hpp.B86CF9747F7CBAA9.idx differ diff --git a/.cache/clangd/index/elementwise_permute.cpp.780590C9EF3E30CE.idx b/.cache/clangd/index/elementwise_permute.cpp.780590C9EF3E30CE.idx new file mode 100755 index 0000000000000000000000000000000000000000..bb27fbc779d6f883f08e63c1f40c19f66d5983db Binary files /dev/null and b/.cache/clangd/index/elementwise_permute.cpp.780590C9EF3E30CE.idx differ diff --git a/.cache/clangd/index/elementwise_permute_3d.cpp.2AC2BFBCE3237215.idx b/.cache/clangd/index/elementwise_permute_3d.cpp.2AC2BFBCE3237215.idx new file mode 100755 index 0000000000000000000000000000000000000000..6c32cf5ca34f3246e125c994549ca9d33625ea8a Binary files /dev/null and b/.cache/clangd/index/elementwise_permute_3d.cpp.2AC2BFBCE3237215.idx differ diff --git a/.cache/clangd/index/elementwise_permute_4D_fp16.cpp.267AD0382A416744.idx b/.cache/clangd/index/elementwise_permute_4D_fp16.cpp.267AD0382A416744.idx new file mode 100755 index 0000000000000000000000000000000000000000..26277d5cfc1c9e49f257f7ca6c0e9ab60dac74be Binary files /dev/null and b/.cache/clangd/index/elementwise_permute_4D_fp16.cpp.267AD0382A416744.idx differ diff --git a/.cache/clangd/index/elementwise_permute_4D_fp16_2d.cpp.73F9F8FFA71E623F.idx b/.cache/clangd/index/elementwise_permute_4D_fp16_2d.cpp.73F9F8FFA71E623F.idx new file mode 100755 index 0000000000000000000000000000000000000000..9c5e4e7d9c29d2faaea391b2dfcc2d02e3388886 Binary files /dev/null and b/.cache/clangd/index/elementwise_permute_4D_fp16_2d.cpp.73F9F8FFA71E623F.idx differ diff --git a/.cache/clangd/index/elementwise_permute_4D_fp16_col.cpp.427D8FEA072CC243.idx b/.cache/clangd/index/elementwise_permute_4D_fp16_col.cpp.427D8FEA072CC243.idx new file mode 100755 index 0000000000000000000000000000000000000000..e489d36c0d2cf308284bafa61c9c72a66229288c Binary files /dev/null and b/.cache/clangd/index/elementwise_permute_4D_fp16_col.cpp.427D8FEA072CC243.idx differ diff --git a/.cache/clangd/index/elementwise_permute_4D_fp16_row.cpp.70A9A9EF321166D7.idx b/.cache/clangd/index/elementwise_permute_4D_fp16_row.cpp.70A9A9EF321166D7.idx new file mode 100755 index 0000000000000000000000000000000000000000..8641b5b73030408ec5a00943b20fa046e7c02552 Binary files /dev/null and b/.cache/clangd/index/elementwise_permute_4D_fp16_row.cpp.70A9A9EF321166D7.idx differ diff --git a/.cache/clangd/index/elementwise_permute_4D_fp32_col.cpp.645D32DD67217BF3.idx b/.cache/clangd/index/elementwise_permute_4D_fp32_col.cpp.645D32DD67217BF3.idx new file mode 100755 index 0000000000000000000000000000000000000000..9d05a298ddb1fc4de2b2512f27bb3205033ced9d Binary files /dev/null and b/.cache/clangd/index/elementwise_permute_4D_fp32_col.cpp.645D32DD67217BF3.idx differ diff --git a/.cache/clangd/index/elementwise_permute_4D_fp32_row.cpp.51034D29A216280F.idx b/.cache/clangd/index/elementwise_permute_4D_fp32_row.cpp.51034D29A216280F.idx new file mode 100755 index 0000000000000000000000000000000000000000..73954ef1c93c1f688635207a3e21c9bc5174982c Binary files /dev/null and b/.cache/clangd/index/elementwise_permute_4D_fp32_row.cpp.51034D29A216280F.idx differ diff --git a/.cache/clangd/index/enable_if.hpp.10016CB3379BC2AC.idx b/.cache/clangd/index/enable_if.hpp.10016CB3379BC2AC.idx new file mode 100755 index 0000000000000000000000000000000000000000..882fbc2bad6afa79fc3861b7ba00ed418cd38518 Binary files /dev/null and b/.cache/clangd/index/enable_if.hpp.10016CB3379BC2AC.idx differ diff --git a/.cache/clangd/index/f8_utils.hpp.AB5D9F32110A912A.idx b/.cache/clangd/index/f8_utils.hpp.AB5D9F32110A912A.idx new file mode 100755 index 0000000000000000000000000000000000000000..3210bc0553b31e5f8154705f2620d7476fc1ab0d Binary files /dev/null and b/.cache/clangd/index/f8_utils.hpp.AB5D9F32110A912A.idx differ diff --git a/.cache/clangd/index/fill.hpp.A302158AF84698F8.idx b/.cache/clangd/index/fill.hpp.A302158AF84698F8.idx new file mode 100755 index 0000000000000000000000000000000000000000..140a1fc80d0ac5b7147630851fb1590557b6cb94 Binary files /dev/null and b/.cache/clangd/index/fill.hpp.A302158AF84698F8.idx differ diff --git a/.cache/clangd/index/functional.hpp.AB02FF3D06BB8256.idx b/.cache/clangd/index/functional.hpp.AB02FF3D06BB8256.idx new file mode 100755 index 0000000000000000000000000000000000000000..eec0504ad873c511f2a094a91bb994e00fa48bf6 Binary files /dev/null and b/.cache/clangd/index/functional.hpp.AB02FF3D06BB8256.idx differ diff --git a/.cache/clangd/index/functional2.hpp.4855B705916A8A54.idx b/.cache/clangd/index/functional2.hpp.4855B705916A8A54.idx new file mode 100755 index 0000000000000000000000000000000000000000..530c9333b0fbf06932c41976d9652c130ee0c5ad Binary files /dev/null and b/.cache/clangd/index/functional2.hpp.4855B705916A8A54.idx differ diff --git a/.cache/clangd/index/functional3.hpp.730E80629C119B9D.idx b/.cache/clangd/index/functional3.hpp.730E80629C119B9D.idx new file mode 100755 index 0000000000000000000000000000000000000000..cce5b119a5f42787dbb37b04ad7d73362bf6239b Binary files /dev/null and b/.cache/clangd/index/functional3.hpp.730E80629C119B9D.idx differ diff --git a/.cache/clangd/index/functional4.hpp.579A0D5EFC68EB03.idx b/.cache/clangd/index/functional4.hpp.579A0D5EFC68EB03.idx new file mode 100755 index 0000000000000000000000000000000000000000..471e2ef40dba41433ec05837e32ec246c75319fc Binary files /dev/null and b/.cache/clangd/index/functional4.hpp.579A0D5EFC68EB03.idx differ diff --git a/.cache/clangd/index/gemm.hpp.344713A6C512A32C.idx b/.cache/clangd/index/gemm.hpp.344713A6C512A32C.idx new file mode 100755 index 0000000000000000000000000000000000000000..d4949b16bf2b75684bffd22d3c8e55f2b3fdb338 Binary files /dev/null and b/.cache/clangd/index/gemm.hpp.344713A6C512A32C.idx differ diff --git a/.cache/clangd/index/gemm.hpp.C45C6ACD321C3642.idx b/.cache/clangd/index/gemm.hpp.C45C6ACD321C3642.idx new file mode 100755 index 0000000000000000000000000000000000000000..95e0d066f1ee4a682ba980611893e4b84aba9271 Binary files /dev/null and b/.cache/clangd/index/gemm.hpp.C45C6ACD321C3642.idx differ diff --git a/.cache/clangd/index/gemm_add.hpp.0A05FF4E11D910E0.idx b/.cache/clangd/index/gemm_add.hpp.0A05FF4E11D910E0.idx new file mode 100755 index 0000000000000000000000000000000000000000..92a4be2c7cee2a8eac2889e0f42012dfa62bca56 Binary files /dev/null and b/.cache/clangd/index/gemm_add.hpp.0A05FF4E11D910E0.idx differ diff --git a/.cache/clangd/index/gemm_add_add_fastgelu.hpp.C1858AFB6E6189D2.idx b/.cache/clangd/index/gemm_add_add_fastgelu.hpp.C1858AFB6E6189D2.idx new file mode 100755 index 0000000000000000000000000000000000000000..3cd773bc4ac72b6d124a6eab1e86b1424f5098ef Binary files /dev/null and b/.cache/clangd/index/gemm_add_add_fastgelu.hpp.C1858AFB6E6189D2.idx differ diff --git a/.cache/clangd/index/gemm_add_add_fastgelu_xdl_bf16.cpp.D697A303BBCD00E1.idx b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_bf16.cpp.D697A303BBCD00E1.idx new file mode 100755 index 0000000000000000000000000000000000000000..1c7a5191101e4b522788e54e149451d448e102c6 Binary files /dev/null and b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_bf16.cpp.D697A303BBCD00E1.idx differ diff --git a/.cache/clangd/index/gemm_add_add_fastgelu_xdl_fp16.cpp.BE72E2A90F751444.idx b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_fp16.cpp.BE72E2A90F751444.idx new file mode 100755 index 0000000000000000000000000000000000000000..fa5e345e3ee6e0795d38528fd61b5e1d0d03dd75 Binary files /dev/null and b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_fp16.cpp.BE72E2A90F751444.idx differ diff --git a/.cache/clangd/index/gemm_add_add_fastgelu_xdl_fp32.cpp.1D688E059DA5C6D3.idx b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_fp32.cpp.1D688E059DA5C6D3.idx new file mode 100755 index 0000000000000000000000000000000000000000..c1d7f6846bdda0faf595a15d923e5c5d40137b7e Binary files /dev/null and b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_fp32.cpp.1D688E059DA5C6D3.idx differ diff --git a/.cache/clangd/index/gemm_add_add_fastgelu_xdl_int8.cpp.7C822BDA45FD8B5C.idx b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_int8.cpp.7C822BDA45FD8B5C.idx new file mode 100755 index 0000000000000000000000000000000000000000..6ac8ee6e62621b5d13ef810009d60fcce7b2d083 Binary files /dev/null and b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_int8.cpp.7C822BDA45FD8B5C.idx differ diff --git a/.cache/clangd/index/gemm_add_add_fastgelu_xdl_lds_direct_load_fp32.cpp.DFA0EFAE6F444F2C.idx b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_lds_direct_load_fp32.cpp.DFA0EFAE6F444F2C.idx new file mode 100755 index 0000000000000000000000000000000000000000..2193d2333a475509e78035a1305ce83e465d4bfe Binary files /dev/null and b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_lds_direct_load_fp32.cpp.DFA0EFAE6F444F2C.idx differ diff --git a/.cache/clangd/index/gemm_add_add_mean_meansquare_xdl_fp16.cpp.784D28B0B387AEB3.idx b/.cache/clangd/index/gemm_add_add_mean_meansquare_xdl_fp16.cpp.784D28B0B387AEB3.idx new file mode 100755 index 0000000000000000000000000000000000000000..e11eb70ed7e87be3b51950fd6892d927ba687a7f Binary files /dev/null and b/.cache/clangd/index/gemm_add_add_mean_meansquare_xdl_fp16.cpp.784D28B0B387AEB3.idx differ diff --git a/.cache/clangd/index/gemm_add_addsquare_xdl_int8.cpp.C81A82E4BF27D67D.idx b/.cache/clangd/index/gemm_add_addsquare_xdl_int8.cpp.C81A82E4BF27D67D.idx new file mode 100755 index 0000000000000000000000000000000000000000..ce078c9e23e4b82b1f298f88b3555e9d022bb825 Binary files /dev/null and b/.cache/clangd/index/gemm_add_addsquare_xdl_int8.cpp.C81A82E4BF27D67D.idx differ diff --git a/.cache/clangd/index/gemm_add_fastgelu.hpp.7F025B27BB774C23.idx b/.cache/clangd/index/gemm_add_fastgelu.hpp.7F025B27BB774C23.idx new file mode 100755 index 0000000000000000000000000000000000000000..7b588fbed10591395d9871270ac663af6c2d74ae Binary files /dev/null and b/.cache/clangd/index/gemm_add_fastgelu.hpp.7F025B27BB774C23.idx differ diff --git a/.cache/clangd/index/gemm_add_multiply.hpp.A076E88A74402B66.idx b/.cache/clangd/index/gemm_add_multiply.hpp.A076E88A74402B66.idx new file mode 100755 index 0000000000000000000000000000000000000000..5f659ccf2b6e20b52311a6521427c02db24e7e45 Binary files /dev/null and b/.cache/clangd/index/gemm_add_multiply.hpp.A076E88A74402B66.idx differ diff --git a/.cache/clangd/index/gemm_add_multiply_xdl_fp16.cpp.2190407D709E315C.idx b/.cache/clangd/index/gemm_add_multiply_xdl_fp16.cpp.2190407D709E315C.idx new file mode 100755 index 0000000000000000000000000000000000000000..7129663e581ceec4354ad1950aac8867473d8427 Binary files /dev/null and b/.cache/clangd/index/gemm_add_multiply_xdl_fp16.cpp.2190407D709E315C.idx differ diff --git a/.cache/clangd/index/gemm_add_relu.hpp.2FD450825E8EDD32.idx b/.cache/clangd/index/gemm_add_relu.hpp.2FD450825E8EDD32.idx new file mode 100755 index 0000000000000000000000000000000000000000..2561e64e2b88603c771367fe3adaf53c392a798c Binary files /dev/null and b/.cache/clangd/index/gemm_add_relu.hpp.2FD450825E8EDD32.idx differ diff --git a/.cache/clangd/index/gemm_add_relu_add_layernorm.hpp.2503A6C1813DEBB3.idx b/.cache/clangd/index/gemm_add_relu_add_layernorm.hpp.2503A6C1813DEBB3.idx new file mode 100755 index 0000000000000000000000000000000000000000..76a4369d51528ed287c1e36374bd86410de58596 Binary files /dev/null and b/.cache/clangd/index/gemm_add_relu_add_layernorm.hpp.2503A6C1813DEBB3.idx differ diff --git a/.cache/clangd/index/gemm_add_silu.hpp.8120675903700DB9.idx b/.cache/clangd/index/gemm_add_silu.hpp.8120675903700DB9.idx new file mode 100755 index 0000000000000000000000000000000000000000..9c6538cc9ab3298a5610931f7318629853394863 Binary files /dev/null and b/.cache/clangd/index/gemm_add_silu.hpp.8120675903700DB9.idx differ diff --git a/.cache/clangd/index/gemm_bf16.cpp.B8DB90A206756A9E.idx b/.cache/clangd/index/gemm_bf16.cpp.B8DB90A206756A9E.idx new file mode 100755 index 0000000000000000000000000000000000000000..1de2a6c84c346bb4a21c4cefbd1f895ac223a98d Binary files /dev/null and b/.cache/clangd/index/gemm_bf16.cpp.B8DB90A206756A9E.idx differ diff --git a/.cache/clangd/index/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp.ACF8DA35700D8D8F.idx b/.cache/clangd/index/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp.ACF8DA35700D8D8F.idx new file mode 100755 index 0000000000000000000000000000000000000000..38f07a5b480675be0f80a3194012a2c05c7eacb4 Binary files /dev/null and b/.cache/clangd/index/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp.ACF8DA35700D8D8F.idx differ diff --git a/.cache/clangd/index/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp.83BB18F6B66F2289.idx b/.cache/clangd/index/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp.83BB18F6B66F2289.idx new file mode 100755 index 0000000000000000000000000000000000000000..becf34f3ae7380a1ec5561372912054743eb9fe9 Binary files /dev/null and b/.cache/clangd/index/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp.83BB18F6B66F2289.idx differ diff --git a/.cache/clangd/index/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp.6BD99D80213AD3B0.idx b/.cache/clangd/index/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp.6BD99D80213AD3B0.idx new file mode 100755 index 0000000000000000000000000000000000000000..27e0077ede9a8d9746e3685614d6d6ccdf956506 Binary files /dev/null and b/.cache/clangd/index/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp.6BD99D80213AD3B0.idx differ diff --git a/.cache/clangd/index/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp.AC32388E298FFA30.idx b/.cache/clangd/index/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp.AC32388E298FFA30.idx new file mode 100755 index 0000000000000000000000000000000000000000..123bb480382606005d78065014426e86110e882d Binary files /dev/null and b/.cache/clangd/index/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp.AC32388E298FFA30.idx differ diff --git a/.cache/clangd/index/gemm_bias_relu_xdl_fp16.cpp.74D4B1F74A45E199.idx b/.cache/clangd/index/gemm_bias_relu_xdl_fp16.cpp.74D4B1F74A45E199.idx new file mode 100755 index 0000000000000000000000000000000000000000..276fd764f70ad7fb3188c203b6c2065fc6d959bb Binary files /dev/null and b/.cache/clangd/index/gemm_bias_relu_xdl_fp16.cpp.74D4B1F74A45E199.idx differ diff --git a/.cache/clangd/index/gemm_bias_softmax_gemm_permute.cpp.A515B9EF146522D4.idx b/.cache/clangd/index/gemm_bias_softmax_gemm_permute.cpp.A515B9EF146522D4.idx new file mode 100755 index 0000000000000000000000000000000000000000..ec5340700df455db9e6530a6a1ea76e9a3f02c45 Binary files /dev/null and b/.cache/clangd/index/gemm_bias_softmax_gemm_permute.cpp.A515B9EF146522D4.idx differ diff --git a/.cache/clangd/index/gemm_bilinear.hpp.092CAB87B366F08F.idx b/.cache/clangd/index/gemm_bilinear.hpp.092CAB87B366F08F.idx new file mode 100755 index 0000000000000000000000000000000000000000..a174214513f8006d1b0dfd24ed1dbf192628dde1 Binary files /dev/null and b/.cache/clangd/index/gemm_bilinear.hpp.092CAB87B366F08F.idx differ diff --git a/.cache/clangd/index/gemm_bilinear_xdl_fp16.cpp.372A779C3E21DD0E.idx b/.cache/clangd/index/gemm_bilinear_xdl_fp16.cpp.372A779C3E21DD0E.idx new file mode 100755 index 0000000000000000000000000000000000000000..4701b88e0503e52394ef0d01d0b86ad1f676806a Binary files /dev/null and b/.cache/clangd/index/gemm_bilinear_xdl_fp16.cpp.372A779C3E21DD0E.idx differ diff --git a/.cache/clangd/index/gemm_dpp_fp16.cpp.D49004015318A845.idx b/.cache/clangd/index/gemm_dpp_fp16.cpp.D49004015318A845.idx new file mode 100755 index 0000000000000000000000000000000000000000..e21b0700763e3d206e47f404c41261927c5508a1 Binary files /dev/null and b/.cache/clangd/index/gemm_dpp_fp16.cpp.D49004015318A845.idx differ diff --git a/.cache/clangd/index/gemm_f16_nn_instance.cpp.684811D08F35F1E0.idx b/.cache/clangd/index/gemm_f16_nn_instance.cpp.684811D08F35F1E0.idx new file mode 100755 index 0000000000000000000000000000000000000000..832c1389ec316d7356f749b5b45e5fe9b8d02fee Binary files /dev/null and b/.cache/clangd/index/gemm_f16_nn_instance.cpp.684811D08F35F1E0.idx differ diff --git a/.cache/clangd/index/gemm_f16_nn_instance.hpp.295DF91B6CFA2E94.idx b/.cache/clangd/index/gemm_f16_nn_instance.hpp.295DF91B6CFA2E94.idx new file mode 100755 index 0000000000000000000000000000000000000000..c989cbd72b1558f202af4cb908c2c4928887285f Binary files /dev/null and b/.cache/clangd/index/gemm_f16_nn_instance.hpp.295DF91B6CFA2E94.idx differ diff --git a/.cache/clangd/index/gemm_f16_nt_instance.cpp.CF459381653536CF.idx b/.cache/clangd/index/gemm_f16_nt_instance.cpp.CF459381653536CF.idx new file mode 100755 index 0000000000000000000000000000000000000000..ccf9f4f0e35eed9f8ff6f0fa2c0206b3043c179b Binary files /dev/null and b/.cache/clangd/index/gemm_f16_nt_instance.cpp.CF459381653536CF.idx differ diff --git a/.cache/clangd/index/gemm_f16_nt_instance.hpp.CF5C609546474B1F.idx b/.cache/clangd/index/gemm_f16_nt_instance.hpp.CF5C609546474B1F.idx new file mode 100755 index 0000000000000000000000000000000000000000..f07b2cbdc53e601f174f7cf58e00bba9489d092a Binary files /dev/null and b/.cache/clangd/index/gemm_f16_nt_instance.hpp.CF5C609546474B1F.idx differ diff --git a/.cache/clangd/index/gemm_f16_tn_instance.cpp.A27FD0EC622C1968.idx b/.cache/clangd/index/gemm_f16_tn_instance.cpp.A27FD0EC622C1968.idx new file mode 100755 index 0000000000000000000000000000000000000000..e5ccebc655debe338a8fe39c59552eeced432bfa Binary files /dev/null and b/.cache/clangd/index/gemm_f16_tn_instance.cpp.A27FD0EC622C1968.idx differ diff --git a/.cache/clangd/index/gemm_f16_tn_instance.hpp.C57CA23FF5A33CBF.idx b/.cache/clangd/index/gemm_f16_tn_instance.hpp.C57CA23FF5A33CBF.idx new file mode 100755 index 0000000000000000000000000000000000000000..45c477f2b04ecbfdb3289c8604174468eee03b15 Binary files /dev/null and b/.cache/clangd/index/gemm_f16_tn_instance.hpp.C57CA23FF5A33CBF.idx differ diff --git a/.cache/clangd/index/gemm_f16_tt_instance.cpp.5D254C2DB3D84C02.idx b/.cache/clangd/index/gemm_f16_tt_instance.cpp.5D254C2DB3D84C02.idx new file mode 100755 index 0000000000000000000000000000000000000000..95bc6d2479758d0409c30ad8d7fc442ca4e58415 Binary files /dev/null and b/.cache/clangd/index/gemm_f16_tt_instance.cpp.5D254C2DB3D84C02.idx differ diff --git a/.cache/clangd/index/gemm_f16_tt_instance.hpp.A9C0C844390863CE.idx b/.cache/clangd/index/gemm_f16_tt_instance.hpp.A9C0C844390863CE.idx new file mode 100755 index 0000000000000000000000000000000000000000..8eeedf5c0d31dfdf28d243c9997fea5c290f879a Binary files /dev/null and b/.cache/clangd/index/gemm_f16_tt_instance.hpp.A9C0C844390863CE.idx differ diff --git a/.cache/clangd/index/gemm_fastgelu.hpp.BF1B6FA711DF924E.idx b/.cache/clangd/index/gemm_fastgelu.hpp.BF1B6FA711DF924E.idx new file mode 100755 index 0000000000000000000000000000000000000000..14a85bb252551ee561c3c8a46f49f6f33d4f7d3a Binary files /dev/null and b/.cache/clangd/index/gemm_fastgelu.hpp.BF1B6FA711DF924E.idx differ diff --git a/.cache/clangd/index/gemm_fp16.cpp.3FD2C2DD621D7AD4.idx b/.cache/clangd/index/gemm_fp16.cpp.3FD2C2DD621D7AD4.idx new file mode 100755 index 0000000000000000000000000000000000000000..547689150871a103e6fb5b595919d61b5e1c0003 Binary files /dev/null and b/.cache/clangd/index/gemm_fp16.cpp.3FD2C2DD621D7AD4.idx differ diff --git a/.cache/clangd/index/gemm_fp32.cpp.6736B177964E2D4B.idx b/.cache/clangd/index/gemm_fp32.cpp.6736B177964E2D4B.idx new file mode 100755 index 0000000000000000000000000000000000000000..81a2995254a47cc3824a35548770e461b8194507 Binary files /dev/null and b/.cache/clangd/index/gemm_fp32.cpp.6736B177964E2D4B.idx differ diff --git a/.cache/clangd/index/gemm_int8.cpp.C4070EF85C2974E6.idx b/.cache/clangd/index/gemm_int8.cpp.C4070EF85C2974E6.idx new file mode 100755 index 0000000000000000000000000000000000000000..627c6607c415779c645edd47007477e2afe5d38a Binary files /dev/null and b/.cache/clangd/index/gemm_int8.cpp.C4070EF85C2974E6.idx differ diff --git a/.cache/clangd/index/gemm_layernorm_xdl_naive_fp16.cpp.B168DA32D72A32C6.idx b/.cache/clangd/index/gemm_layernorm_xdl_naive_fp16.cpp.B168DA32D72A32C6.idx new file mode 100755 index 0000000000000000000000000000000000000000..6262963b68762adc2a85e851fdcd667ab6ac4d42 Binary files /dev/null and b/.cache/clangd/index/gemm_layernorm_xdl_naive_fp16.cpp.B168DA32D72A32C6.idx differ diff --git a/.cache/clangd/index/gemm_max_xdl_bf16.cpp.40A7D230DC3CAD48.idx b/.cache/clangd/index/gemm_max_xdl_bf16.cpp.40A7D230DC3CAD48.idx new file mode 100755 index 0000000000000000000000000000000000000000..fc3b58db49e6420c3ef8bf2774576470b500bda9 Binary files /dev/null and b/.cache/clangd/index/gemm_max_xdl_bf16.cpp.40A7D230DC3CAD48.idx differ diff --git a/.cache/clangd/index/gemm_max_xdl_fp16.cpp.F50CF88BE688DE26.idx b/.cache/clangd/index/gemm_max_xdl_fp16.cpp.F50CF88BE688DE26.idx new file mode 100755 index 0000000000000000000000000000000000000000..8f3e6b1f0e1b136fc9c4c589e43fa3995c411a41 Binary files /dev/null and b/.cache/clangd/index/gemm_max_xdl_fp16.cpp.F50CF88BE688DE26.idx differ diff --git a/.cache/clangd/index/gemm_max_xdl_fp32.cpp.1AFB0F6127AC986E.idx b/.cache/clangd/index/gemm_max_xdl_fp32.cpp.1AFB0F6127AC986E.idx new file mode 100755 index 0000000000000000000000000000000000000000..205d2749f04183ff35c2ddfb292046c061aa717e Binary files /dev/null and b/.cache/clangd/index/gemm_max_xdl_fp32.cpp.1AFB0F6127AC986E.idx differ diff --git a/.cache/clangd/index/gemm_max_xdl_int8.cpp.A29BA4DC035DE52E.idx b/.cache/clangd/index/gemm_max_xdl_int8.cpp.A29BA4DC035DE52E.idx new file mode 100755 index 0000000000000000000000000000000000000000..188ac33dd649abeb2ceafe42b6e0647518698dd3 Binary files /dev/null and b/.cache/clangd/index/gemm_max_xdl_int8.cpp.A29BA4DC035DE52E.idx differ diff --git a/.cache/clangd/index/gemm_mean_meansquare_xdl_bf16.cpp.E1D95735FEB3AF62.idx b/.cache/clangd/index/gemm_mean_meansquare_xdl_bf16.cpp.E1D95735FEB3AF62.idx new file mode 100755 index 0000000000000000000000000000000000000000..9bcaab94261304a74fe91766c692e908538232fb Binary files /dev/null and b/.cache/clangd/index/gemm_mean_meansquare_xdl_bf16.cpp.E1D95735FEB3AF62.idx differ diff --git a/.cache/clangd/index/gemm_mean_meansquare_xdl_fp16.cpp.80F76169F90BD8DC.idx b/.cache/clangd/index/gemm_mean_meansquare_xdl_fp16.cpp.80F76169F90BD8DC.idx new file mode 100755 index 0000000000000000000000000000000000000000..6212ae8b7bee5792c7f68ba4e146020c7087f916 Binary files /dev/null and b/.cache/clangd/index/gemm_mean_meansquare_xdl_fp16.cpp.80F76169F90BD8DC.idx differ diff --git a/.cache/clangd/index/gemm_mean_meansquare_xdl_fp32.cpp.DAB08B69CF521C93.idx b/.cache/clangd/index/gemm_mean_meansquare_xdl_fp32.cpp.DAB08B69CF521C93.idx new file mode 100755 index 0000000000000000000000000000000000000000..d61fd4efb2d576d12d596b5f9957fdb8ead206d8 Binary files /dev/null and b/.cache/clangd/index/gemm_mean_meansquare_xdl_fp32.cpp.DAB08B69CF521C93.idx differ diff --git a/.cache/clangd/index/gemm_multi_ABD_xdl_fp16.cpp.5859AE2CEFBAACD5.idx b/.cache/clangd/index/gemm_multi_ABD_xdl_fp16.cpp.5859AE2CEFBAACD5.idx new file mode 100755 index 0000000000000000000000000000000000000000..51b25e7bcabcac889e707d47069698ab81cb3bfb Binary files /dev/null and b/.cache/clangd/index/gemm_multi_ABD_xdl_fp16.cpp.5859AE2CEFBAACD5.idx differ diff --git a/.cache/clangd/index/gemm_multiply_add.hpp.6B6D21EE52049C9D.idx b/.cache/clangd/index/gemm_multiply_add.hpp.6B6D21EE52049C9D.idx new file mode 100755 index 0000000000000000000000000000000000000000..16c7745228f2273c99ae57311e6a9fa74e2003b6 Binary files /dev/null and b/.cache/clangd/index/gemm_multiply_add.hpp.6B6D21EE52049C9D.idx differ diff --git a/.cache/clangd/index/gemm_quantization_common.hpp.A23B3AA4CB2F3E7A.idx b/.cache/clangd/index/gemm_quantization_common.hpp.A23B3AA4CB2F3E7A.idx new file mode 100755 index 0000000000000000000000000000000000000000..31cd900bb6f3cee6368646c73995735574e5c0dd Binary files /dev/null and b/.cache/clangd/index/gemm_quantization_common.hpp.A23B3AA4CB2F3E7A.idx differ diff --git a/.cache/clangd/index/gemm_reduce_fp16.cpp.EFAED30A7BA2F6AF.idx b/.cache/clangd/index/gemm_reduce_fp16.cpp.EFAED30A7BA2F6AF.idx new file mode 100755 index 0000000000000000000000000000000000000000..fa81f84619ce371316cd8147b61c42670505a3cb Binary files /dev/null and b/.cache/clangd/index/gemm_reduce_fp16.cpp.EFAED30A7BA2F6AF.idx differ diff --git a/.cache/clangd/index/gemm_reduce_xdl_common.hpp.BF32975909DCA917.idx b/.cache/clangd/index/gemm_reduce_xdl_common.hpp.BF32975909DCA917.idx new file mode 100755 index 0000000000000000000000000000000000000000..f5bb3522fdab70008c626df6327579e432f7f2ae Binary files /dev/null and b/.cache/clangd/index/gemm_reduce_xdl_common.hpp.BF32975909DCA917.idx differ diff --git a/.cache/clangd/index/gemm_specialization.hpp.F3D9282B71057FD0.idx b/.cache/clangd/index/gemm_specialization.hpp.F3D9282B71057FD0.idx new file mode 100755 index 0000000000000000000000000000000000000000..4c4edce95d221c43179ebf5a3900882eda9b448e Binary files /dev/null and b/.cache/clangd/index/gemm_specialization.hpp.F3D9282B71057FD0.idx differ diff --git a/.cache/clangd/index/gemm_splitk.hpp.749B3E67AC6B8B9C.idx b/.cache/clangd/index/gemm_splitk.hpp.749B3E67AC6B8B9C.idx new file mode 100755 index 0000000000000000000000000000000000000000..ff7861103617ea8a723e187ecc1a34d9f71f5b64 Binary files /dev/null and b/.cache/clangd/index/gemm_splitk.hpp.749B3E67AC6B8B9C.idx differ diff --git a/.cache/clangd/index/gemm_standalone_xdl_fp16.cpp.EE84FA2A4697A71B.idx b/.cache/clangd/index/gemm_standalone_xdl_fp16.cpp.EE84FA2A4697A71B.idx new file mode 100755 index 0000000000000000000000000000000000000000..e712de5d3f495b3d1cf1258072d32e90e0501229 Binary files /dev/null and b/.cache/clangd/index/gemm_standalone_xdl_fp16.cpp.EE84FA2A4697A71B.idx differ diff --git a/.cache/clangd/index/gemm_streamk.hpp.B73183768811D4AE.idx b/.cache/clangd/index/gemm_streamk.hpp.B73183768811D4AE.idx new file mode 100755 index 0000000000000000000000000000000000000000..5d2164748ace7907d19a614ffb01b464d2802862 Binary files /dev/null and b/.cache/clangd/index/gemm_streamk.hpp.B73183768811D4AE.idx differ diff --git a/.cache/clangd/index/gemm_util.hpp.1BFB1C62397B2C8C.idx b/.cache/clangd/index/gemm_util.hpp.1BFB1C62397B2C8C.idx new file mode 100755 index 0000000000000000000000000000000000000000..7910701d358b151d2116f4e89427f6f215331b8c Binary files /dev/null and b/.cache/clangd/index/gemm_util.hpp.1BFB1C62397B2C8C.idx differ diff --git a/.cache/clangd/index/gemm_wavelet_f16_tn_instance.cpp.DC996A5B769C800C.idx b/.cache/clangd/index/gemm_wavelet_f16_tn_instance.cpp.DC996A5B769C800C.idx new file mode 100755 index 0000000000000000000000000000000000000000..0eba24c173ece16ccced696e5e9894858f520f3a Binary files /dev/null and b/.cache/clangd/index/gemm_wavelet_f16_tn_instance.cpp.DC996A5B769C800C.idx differ diff --git a/.cache/clangd/index/gemm_wavelet_f16_tn_instance.hpp.6DF980223599227D.idx b/.cache/clangd/index/gemm_wavelet_f16_tn_instance.hpp.6DF980223599227D.idx new file mode 100755 index 0000000000000000000000000000000000000000..996f827df9971058e26522febb77cc8e21280f29 Binary files /dev/null and b/.cache/clangd/index/gemm_wavelet_f16_tn_instance.hpp.6DF980223599227D.idx differ diff --git a/.cache/clangd/index/gemm_xdl_bf16.cpp.3DCFB3ED2888403A.idx b/.cache/clangd/index/gemm_xdl_bf16.cpp.3DCFB3ED2888403A.idx new file mode 100755 index 0000000000000000000000000000000000000000..cd629716cabe9091e17ee1372078e4893c315f7c Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_bf16.cpp.3DCFB3ED2888403A.idx differ diff --git a/.cache/clangd/index/gemm_xdl_bf16_rtn.cpp.52661E2CDB7295F6.idx b/.cache/clangd/index/gemm_xdl_bf16_rtn.cpp.52661E2CDB7295F6.idx new file mode 100755 index 0000000000000000000000000000000000000000..276a4284f6adc85d7625cc999a4aadff265e0349 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_bf16_rtn.cpp.52661E2CDB7295F6.idx differ diff --git a/.cache/clangd/index/gemm_xdl_bias_relu_quantization_int8.cpp.FA91569EB26ACB71.idx b/.cache/clangd/index/gemm_xdl_bias_relu_quantization_int8.cpp.FA91569EB26ACB71.idx new file mode 100755 index 0000000000000000000000000000000000000000..c44995d421facc98c177f7ef4f2239e0d01cf613 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_bias_relu_quantization_int8.cpp.FA91569EB26ACB71.idx differ diff --git a/.cache/clangd/index/gemm_xdl_fp16.cpp.FC8CAA78DACFED36.idx b/.cache/clangd/index/gemm_xdl_fp16.cpp.FC8CAA78DACFED36.idx new file mode 100755 index 0000000000000000000000000000000000000000..45bde02093b0f0f4d1f619f726e0fb6313fcf72d Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_fp16.cpp.FC8CAA78DACFED36.idx differ diff --git a/.cache/clangd/index/gemm_xdl_fp16_fp8.cpp.44C0E7F4DAEEA470.idx b/.cache/clangd/index/gemm_xdl_fp16_fp8.cpp.44C0E7F4DAEEA470.idx new file mode 100755 index 0000000000000000000000000000000000000000..f911a3a75d9f584a370f682a0981a136d025e8c0 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_fp16_fp8.cpp.44C0E7F4DAEEA470.idx differ diff --git a/.cache/clangd/index/gemm_xdl_fp16_v2.cpp.BBCC41C35D9D878E.idx b/.cache/clangd/index/gemm_xdl_fp16_v2.cpp.BBCC41C35D9D878E.idx new file mode 100755 index 0000000000000000000000000000000000000000..7dbed18bddf92647e01b7060135daf0155ce9ee4 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_fp16_v2.cpp.BBCC41C35D9D878E.idx differ diff --git a/.cache/clangd/index/gemm_xdl_fp64.cpp.326BD4F598F31DB0.idx b/.cache/clangd/index/gemm_xdl_fp64.cpp.326BD4F598F31DB0.idx new file mode 100755 index 0000000000000000000000000000000000000000..41318b8d75351e3168da0504848fa1a48201c945 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_fp64.cpp.326BD4F598F31DB0.idx differ diff --git a/.cache/clangd/index/gemm_xdl_fp8.cpp.67C1EEC83062DA6B.idx b/.cache/clangd/index/gemm_xdl_fp8.cpp.67C1EEC83062DA6B.idx new file mode 100755 index 0000000000000000000000000000000000000000..005908ca90f232d6de4a5541b3b03d1b3ffd0be7 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_fp8.cpp.67C1EEC83062DA6B.idx differ diff --git a/.cache/clangd/index/gemm_xdl_fp8_bf8.cpp.AEAA96B6834BEA56.idx b/.cache/clangd/index/gemm_xdl_fp8_bf8.cpp.AEAA96B6834BEA56.idx new file mode 100755 index 0000000000000000000000000000000000000000..c625ea67d2b22a08e0f66cd7130ceb63ff185031 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_fp8_bf8.cpp.AEAA96B6834BEA56.idx differ diff --git a/.cache/clangd/index/gemm_xdl_int8.cpp.636EBC0B4897BF3A.idx b/.cache/clangd/index/gemm_xdl_int8.cpp.636EBC0B4897BF3A.idx new file mode 100755 index 0000000000000000000000000000000000000000..98767742f762a93909a1a9941f3278fc4b466f2f Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_int8.cpp.636EBC0B4897BF3A.idx differ diff --git a/.cache/clangd/index/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp.4306E603B691D828.idx b/.cache/clangd/index/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp.4306E603B691D828.idx new file mode 100755 index 0000000000000000000000000000000000000000..5ce5f001a6a069f02ee5f210ca012b58b0416663 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp.4306E603B691D828.idx differ diff --git a/.cache/clangd/index/gemm_xdl_lds_direct_load_fp16.cpp.7BFA821D80385E72.idx b/.cache/clangd/index/gemm_xdl_lds_direct_load_fp16.cpp.7BFA821D80385E72.idx new file mode 100755 index 0000000000000000000000000000000000000000..0f980b1e260c12f22e77ed2a73b16d3d2b620951 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_lds_direct_load_fp16.cpp.7BFA821D80385E72.idx differ diff --git a/.cache/clangd/index/gemm_xdl_lds_direct_load_fp32.cpp.39A0D92907A2D21F.idx b/.cache/clangd/index/gemm_xdl_lds_direct_load_fp32.cpp.39A0D92907A2D21F.idx new file mode 100755 index 0000000000000000000000000000000000000000..e5af5fec80e800a22f53bf3142e1491206b61711 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_lds_direct_load_fp32.cpp.39A0D92907A2D21F.idx differ diff --git a/.cache/clangd/index/gemm_xdl_quantization_int8.cpp.A58C21992E5540BA.idx b/.cache/clangd/index/gemm_xdl_quantization_int8.cpp.A58C21992E5540BA.idx new file mode 100755 index 0000000000000000000000000000000000000000..d8bb10aaf4264ef05ffda9f9eefce970b9cc951b Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_quantization_int8.cpp.A58C21992E5540BA.idx differ diff --git a/.cache/clangd/index/gemm_xdl_skip_b_lds_fp16.cpp.86DD71A6DF5D193B.idx b/.cache/clangd/index/gemm_xdl_skip_b_lds_fp16.cpp.86DD71A6DF5D193B.idx new file mode 100755 index 0000000000000000000000000000000000000000..c135648fa47a46f0065e2b865aa0e05fc55795f5 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_skip_b_lds_fp16.cpp.86DD71A6DF5D193B.idx differ diff --git a/.cache/clangd/index/gemm_xdl_streamk.cpp.2BC4B9D993728CB3.idx b/.cache/clangd/index/gemm_xdl_streamk.cpp.2BC4B9D993728CB3.idx new file mode 100755 index 0000000000000000000000000000000000000000..130ac4aba57c1f358f612f7402e540cb7fceafc2 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_streamk.cpp.2BC4B9D993728CB3.idx differ diff --git a/.cache/clangd/index/gemm_xdl_wavelet_fp16.cpp.5100C57848816EFC.idx b/.cache/clangd/index/gemm_xdl_wavelet_fp16.cpp.5100C57848816EFC.idx new file mode 100755 index 0000000000000000000000000000000000000000..a44a41fb78543365bdb7acba2d5722ff8b8aba7f Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_wavelet_fp16.cpp.5100C57848816EFC.idx differ diff --git a/.cache/clangd/index/generic_memory_space_atomic.hpp.C9D4AE0A6DC5899F.idx b/.cache/clangd/index/generic_memory_space_atomic.hpp.C9D4AE0A6DC5899F.idx new file mode 100755 index 0000000000000000000000000000000000000000..ce5a6d47eb509257754f8085191b4e46f4ab2f23 Binary files /dev/null and b/.cache/clangd/index/generic_memory_space_atomic.hpp.C9D4AE0A6DC5899F.idx differ diff --git a/.cache/clangd/index/get_id.hpp.D8C96396D4A9E3B4.idx b/.cache/clangd/index/get_id.hpp.D8C96396D4A9E3B4.idx new file mode 100755 index 0000000000000000000000000000000000000000..710a2a623dd454a6d1d041c3ed55ee9bad3cc47b Binary files /dev/null and b/.cache/clangd/index/get_id.hpp.D8C96396D4A9E3B4.idx differ diff --git a/.cache/clangd/index/get_shift.hpp.0A10E058B6B6AEDA.idx b/.cache/clangd/index/get_shift.hpp.0A10E058B6B6AEDA.idx new file mode 100755 index 0000000000000000000000000000000000000000..8a962c7d1bc007d3a1973d9ee83840e6ce0eed91 Binary files /dev/null and b/.cache/clangd/index/get_shift.hpp.0A10E058B6B6AEDA.idx differ diff --git a/.cache/clangd/index/gridwise_2d_multiple_reduction_multiblock.hpp.B962E3920F925D70.idx b/.cache/clangd/index/gridwise_2d_multiple_reduction_multiblock.hpp.B962E3920F925D70.idx new file mode 100755 index 0000000000000000000000000000000000000000..f3fe57c665b2b465322e3c04d9eb7cac87929eb6 Binary files /dev/null and b/.cache/clangd/index/gridwise_2d_multiple_reduction_multiblock.hpp.B962E3920F925D70.idx differ diff --git a/.cache/clangd/index/gridwise_2d_multiple_reduction_threadwise.hpp.39A048A5A487AC02.idx b/.cache/clangd/index/gridwise_2d_multiple_reduction_threadwise.hpp.39A048A5A487AC02.idx new file mode 100755 index 0000000000000000000000000000000000000000..efe8f75f84d24b0a81f780a07a82e6474e9482f7 Binary files /dev/null and b/.cache/clangd/index/gridwise_2d_multiple_reduction_threadwise.hpp.39A048A5A487AC02.idx differ diff --git a/.cache/clangd/index/gridwise_2d_reduction_multiblock.hpp.6BD3600B2B4E50BE.idx b/.cache/clangd/index/gridwise_2d_reduction_multiblock.hpp.6BD3600B2B4E50BE.idx new file mode 100755 index 0000000000000000000000000000000000000000..3f580fe9d69cef278bbafc070f0f957ea1157c43 Binary files /dev/null and b/.cache/clangd/index/gridwise_2d_reduction_multiblock.hpp.6BD3600B2B4E50BE.idx differ diff --git a/.cache/clangd/index/gridwise_2d_reduction_threadwise.hpp.324930ADA73F028A.idx b/.cache/clangd/index/gridwise_2d_reduction_threadwise.hpp.324930ADA73F028A.idx new file mode 100755 index 0000000000000000000000000000000000000000..197056666708c5a14f9f1e9cd82732be7ec35d17 Binary files /dev/null and b/.cache/clangd/index/gridwise_2d_reduction_threadwise.hpp.324930ADA73F028A.idx differ diff --git a/.cache/clangd/index/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp.E109D2D6AEE43DDD.idx b/.cache/clangd/index/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp.E109D2D6AEE43DDD.idx new file mode 100755 index 0000000000000000000000000000000000000000..025c0c8040e36d1fd5f6eb95d3cd6e9d479ae66d Binary files /dev/null and b/.cache/clangd/index/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp.E109D2D6AEE43DDD.idx differ diff --git a/.cache/clangd/index/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp.FE93431287EC85FD.idx b/.cache/clangd/index/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp.FE93431287EC85FD.idx new file mode 100755 index 0000000000000000000000000000000000000000..88cb12e6354a254b3263594861727d3f5dc54c7f Binary files /dev/null and b/.cache/clangd/index/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp.FE93431287EC85FD.idx differ diff --git a/.cache/clangd/index/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp.A946F68C0A5B7C29.idx b/.cache/clangd/index/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp.A946F68C0A5B7C29.idx new file mode 100755 index 0000000000000000000000000000000000000000..e69cb3de047db51be699ed89986eab098bdcb2d2 Binary files /dev/null and b/.cache/clangd/index/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp.A946F68C0A5B7C29.idx differ diff --git a/.cache/clangd/index/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp.EEF70E721362F483.idx b/.cache/clangd/index/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp.EEF70E721362F483.idx new file mode 100755 index 0000000000000000000000000000000000000000..f1f22ea873922e7b4c327dc16f52548e1edc0f14 Binary files /dev/null and b/.cache/clangd/index/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp.EEF70E721362F483.idx differ diff --git a/.cache/clangd/index/gridwise_batchnorm_backward_blockwise_welford.hpp.8F24BDC37125B404.idx b/.cache/clangd/index/gridwise_batchnorm_backward_blockwise_welford.hpp.8F24BDC37125B404.idx new file mode 100755 index 0000000000000000000000000000000000000000..c5253527eafd9d6e5627777fcea85801df49a7b8 Binary files /dev/null and b/.cache/clangd/index/gridwise_batchnorm_backward_blockwise_welford.hpp.8F24BDC37125B404.idx differ diff --git a/.cache/clangd/index/gridwise_batchnorm_forward_blockwise_welford.hpp.640DA951EF640017.idx b/.cache/clangd/index/gridwise_batchnorm_forward_blockwise_welford.hpp.640DA951EF640017.idx new file mode 100755 index 0000000000000000000000000000000000000000..7241dba09c1d81abe379d92e3dd11fc2e160878c Binary files /dev/null and b/.cache/clangd/index/gridwise_batchnorm_forward_blockwise_welford.hpp.640DA951EF640017.idx differ diff --git a/.cache/clangd/index/gridwise_elementwise_1d.hpp.AC0144CD925F6905.idx b/.cache/clangd/index/gridwise_elementwise_1d.hpp.AC0144CD925F6905.idx new file mode 100755 index 0000000000000000000000000000000000000000..cd6bb67c7134e3b49048add880b8082decc20359 Binary files /dev/null and b/.cache/clangd/index/gridwise_elementwise_1d.hpp.AC0144CD925F6905.idx differ diff --git a/.cache/clangd/index/gridwise_elementwise_1d_scale.hpp.CF7CE1061BA1DF52.idx b/.cache/clangd/index/gridwise_elementwise_1d_scale.hpp.CF7CE1061BA1DF52.idx new file mode 100755 index 0000000000000000000000000000000000000000..90272db2e00aa239f7ba6bc6449813950907ee4f Binary files /dev/null and b/.cache/clangd/index/gridwise_elementwise_1d_scale.hpp.CF7CE1061BA1DF52.idx differ diff --git a/.cache/clangd/index/gridwise_elementwise_2d.hpp.DCACD2865A1A82CC.idx b/.cache/clangd/index/gridwise_elementwise_2d.hpp.DCACD2865A1A82CC.idx new file mode 100755 index 0000000000000000000000000000000000000000..ea4a848e2d603ca35715cb1393355b79af75e193 Binary files /dev/null and b/.cache/clangd/index/gridwise_elementwise_2d.hpp.DCACD2865A1A82CC.idx differ diff --git a/.cache/clangd/index/gridwise_elementwise_3d.hpp.4B57404BC7344584.idx b/.cache/clangd/index/gridwise_elementwise_3d.hpp.4B57404BC7344584.idx new file mode 100755 index 0000000000000000000000000000000000000000..bc954cd8f5dbd8f28c3ad03b95b40f523be08df4 Binary files /dev/null and b/.cache/clangd/index/gridwise_elementwise_3d.hpp.4B57404BC7344584.idx differ diff --git a/.cache/clangd/index/gridwise_elementwise_layernorm_welford_variance.hpp.61A857F748242277.idx b/.cache/clangd/index/gridwise_elementwise_layernorm_welford_variance.hpp.61A857F748242277.idx new file mode 100755 index 0000000000000000000000000000000000000000..9f9c5a8132539edce44bf30bda39149c7299162e Binary files /dev/null and b/.cache/clangd/index/gridwise_elementwise_layernorm_welford_variance.hpp.61A857F748242277.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp.6AEBEC03A02FE520.idx b/.cache/clangd/index/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp.6AEBEC03A02FE520.idx new file mode 100755 index 0000000000000000000000000000000000000000..bed1f5dbedef093f563acc02a67fde52d3b8cc03 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp.6AEBEC03A02FE520.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_dpp.hpp.E915886DCA1D5AA3.idx b/.cache/clangd/index/gridwise_gemm_dpp.hpp.E915886DCA1D5AA3.idx new file mode 100755 index 0000000000000000000000000000000000000000..b70aab05684de1e53e164555eeb40f83c875670d Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_dpp.hpp.E915886DCA1D5AA3.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp.8ED0C5830B90D078.idx b/.cache/clangd/index/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp.8ED0C5830B90D078.idx new file mode 100755 index 0000000000000000000000000000000000000000..dd3c3c47224fe48fbd9c633cd4c0f8dded9fb31f Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp.8ED0C5830B90D078.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp.E96B152CEA3EEC31.idx b/.cache/clangd/index/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp.E96B152CEA3EEC31.idx new file mode 100755 index 0000000000000000000000000000000000000000..e349c8662e4e3c8b4f9a5ef1a37fdf9d4fd498a3 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp.E96B152CEA3EEC31.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp.75601854417950D1.idx b/.cache/clangd/index/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp.75601854417950D1.idx new file mode 100755 index 0000000000000000000000000000000000000000..363b7b0660b33425828e3e7724b27792c9b5552d Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp.75601854417950D1.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_multiple_d_wmma_cshuffle.hpp.7EFDFD3F43E2C561.idx b/.cache/clangd/index/gridwise_gemm_multiple_d_wmma_cshuffle.hpp.7EFDFD3F43E2C561.idx new file mode 100755 index 0000000000000000000000000000000000000000..4a42614665462651fabe0500b629ae7b33998fba Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_multiple_d_wmma_cshuffle.hpp.7EFDFD3F43E2C561.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_cshuffle.hpp.7B2A45106BD10E35.idx b/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_cshuffle.hpp.7B2A45106BD10E35.idx new file mode 100755 index 0000000000000000000000000000000000000000..66230c649cc7a789d0cf72214500d9a9301d087b Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_cshuffle.hpp.7B2A45106BD10E35.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp.8A2E3D6921803C3C.idx b/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp.8A2E3D6921803C3C.idx new file mode 100755 index 0000000000000000000000000000000000000000..f7d353415ad2a2412d871ac605af58db6c87d073 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp.8A2E3D6921803C3C.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp.79ABECDBF9253649.idx b/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp.79ABECDBF9253649.idx new file mode 100755 index 0000000000000000000000000000000000000000..48c200a13e87eeb4f81c5b9a99fef3fc89ca0308 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp.79ABECDBF9253649.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_pipeline_selector.hpp.4F4E408D810A020A.idx b/.cache/clangd/index/gridwise_gemm_pipeline_selector.hpp.4F4E408D810A020A.idx new file mode 100755 index 0000000000000000000000000000000000000000..76837aa0677535f58f27f980d70e06662701e92f Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_pipeline_selector.hpp.4F4E408D810A020A.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_pipeline_v1.hpp.8B594F4BBFC9415D.idx b/.cache/clangd/index/gridwise_gemm_pipeline_v1.hpp.8B594F4BBFC9415D.idx new file mode 100755 index 0000000000000000000000000000000000000000..a2b420caa1f6cffe696a1f3868b021b46a8cf277 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_pipeline_v1.hpp.8B594F4BBFC9415D.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_pipeline_v2.hpp.1C7FE339CB47DFD5.idx b/.cache/clangd/index/gridwise_gemm_pipeline_v2.hpp.1C7FE339CB47DFD5.idx new file mode 100755 index 0000000000000000000000000000000000000000..ff45438c04e43d918221a85635e4f803bccab9f8 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_pipeline_v2.hpp.1C7FE339CB47DFD5.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_pipeline_v3.hpp.D2EF7B80D9AE44CD.idx b/.cache/clangd/index/gridwise_gemm_pipeline_v3.hpp.D2EF7B80D9AE44CD.idx new file mode 100755 index 0000000000000000000000000000000000000000..43e9e4afa3b3c08fea439baa37a984285f2849e3 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_pipeline_v3.hpp.D2EF7B80D9AE44CD.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_pipeline_v4_direct_load.hpp.7C3BB633588D635D.idx b/.cache/clangd/index/gridwise_gemm_pipeline_v4_direct_load.hpp.7C3BB633588D635D.idx new file mode 100755 index 0000000000000000000000000000000000000000..5e4070436c52f91ac8e0418dd37e6f3723800194 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_pipeline_v4_direct_load.hpp.7C3BB633588D635D.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp.76D93CEF02B94AEF.idx b/.cache/clangd/index/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp.76D93CEF02B94AEF.idx new file mode 100755 index 0000000000000000000000000000000000000000..a9d1d6782a6a12de725d0654727f134ce8870bfc Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp.76D93CEF02B94AEF.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp.F83027A680F1BF3E.idx b/.cache/clangd/index/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp.F83027A680F1BF3E.idx new file mode 100755 index 0000000000000000000000000000000000000000..586b61c139e24ad183851fdef6d71c30ab3e2450 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp.F83027A680F1BF3E.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_waveletmodel.hpp.C9F027882C9D4F2B.idx b/.cache/clangd/index/gridwise_gemm_waveletmodel.hpp.C9F027882C9D4F2B.idx new file mode 100755 index 0000000000000000000000000000000000000000..a7c0eeae3a78a2c09c8ff02829658dc371c90618 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_waveletmodel.hpp.C9F027882C9D4F2B.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_wmma.hpp.4F8B2CDD1348D704.idx b/.cache/clangd/index/gridwise_gemm_wmma.hpp.4F8B2CDD1348D704.idx new file mode 100755 index 0000000000000000000000000000000000000000..32516562155502353b796bc8e12b486e162fcc61 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_wmma.hpp.4F8B2CDD1348D704.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdl_cshuffle_v1.hpp.E796A6FD7BCB2B56.idx b/.cache/clangd/index/gridwise_gemm_xdl_cshuffle_v1.hpp.E796A6FD7BCB2B56.idx new file mode 100755 index 0000000000000000000000000000000000000000..a8f867a42e40c9bd75f9d72dfdba3be025910415 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdl_cshuffle_v1.hpp.E796A6FD7BCB2B56.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdl_cshuffle_v2.hpp.E0B3C0FB7BAA0FB3.idx b/.cache/clangd/index/gridwise_gemm_xdl_cshuffle_v2.hpp.E0B3C0FB7BAA0FB3.idx new file mode 100755 index 0000000000000000000000000000000000000000..beae9ba703c1b999b88b35097ac05c8a26bd4153 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdl_cshuffle_v2.hpp.E0B3C0FB7BAA0FB3.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp.FC9F72A612CD78AC.idx b/.cache/clangd/index/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp.FC9F72A612CD78AC.idx new file mode 100755 index 0000000000000000000000000000000000000000..c94fd94705a1266f30c8778ab89bd347e15cf837 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp.FC9F72A612CD78AC.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp.647408B24FB9D296.idx b/.cache/clangd/index/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp.647408B24FB9D296.idx new file mode 100755 index 0000000000000000000000000000000000000000..5ca594b41dd1e8d72f3d65416c021f020e5feabf Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp.647408B24FB9D296.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_bwd_weight.hpp.65AB95ECB27BA1C2.idx b/.cache/clangd/index/gridwise_gemm_xdlops_bwd_weight.hpp.65AB95ECB27BA1C2.idx new file mode 100755 index 0000000000000000000000000000000000000000..04d190babf1755748594e01ebd94225205992fec Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_bwd_weight.hpp.65AB95ECB27BA1C2.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_skip_b_lds_v1.hpp.8CF2D2B713EFE65F.idx b/.cache/clangd/index/gridwise_gemm_xdlops_skip_b_lds_v1.hpp.8CF2D2B713EFE65F.idx new file mode 100755 index 0000000000000000000000000000000000000000..ce7e75651f0290b8b7cc31a4972ba5a7938ffdaa Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_skip_b_lds_v1.hpp.8CF2D2B713EFE65F.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp.76232B181B348ACC.idx b/.cache/clangd/index/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp.76232B181B348ACC.idx new file mode 100755 index 0000000000000000000000000000000000000000..da1c489a1445410b45618f47b50d55b64e906509 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp.76232B181B348ACC.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_streamk.hpp.3A8F1B585329CAFC.idx b/.cache/clangd/index/gridwise_gemm_xdlops_streamk.hpp.3A8F1B585329CAFC.idx new file mode 100755 index 0000000000000000000000000000000000000000..5c9ba8bb9a0959aed780fa361c6e4d168c7efc84 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_streamk.hpp.3A8F1B585329CAFC.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_v2r3.hpp.66B1A28D309FB5D4.idx b/.cache/clangd/index/gridwise_gemm_xdlops_v2r3.hpp.66B1A28D309FB5D4.idx new file mode 100755 index 0000000000000000000000000000000000000000..000b5cded4987bad2013c57491803a89a543bc4a Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_v2r3.hpp.66B1A28D309FB5D4.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_v2r4r2.hpp.FFFD55E6D49B5037.idx b/.cache/clangd/index/gridwise_gemm_xdlops_v2r4r2.hpp.FFFD55E6D49B5037.idx new file mode 100755 index 0000000000000000000000000000000000000000..9c73ef86a0a06cab4f0dcee22d1339b6ef00ebe2 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_v2r4r2.hpp.FFFD55E6D49B5037.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_v3r1.hpp.F4E339DE2B052B4F.idx b/.cache/clangd/index/gridwise_gemm_xdlops_v3r1.hpp.F4E339DE2B052B4F.idx new file mode 100755 index 0000000000000000000000000000000000000000..d4c706130d039d40a000375415139a7a0f5ee630 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_v3r1.hpp.F4E339DE2B052B4F.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_v3r2.hpp.A91ECFDCE099EED9.idx b/.cache/clangd/index/gridwise_gemm_xdlops_v3r2.hpp.A91ECFDCE099EED9.idx new file mode 100755 index 0000000000000000000000000000000000000000..68e81bed95e0433fd1fb4fcb5128e6a64c8ec45b Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_v3r2.hpp.A91ECFDCE099EED9.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_v3r3.hpp.E0F0A6CCA772EDE7.idx b/.cache/clangd/index/gridwise_gemm_xdlops_v3r3.hpp.E0F0A6CCA772EDE7.idx new file mode 100755 index 0000000000000000000000000000000000000000..acd23df4807572896afb04d823abdbe7257abadd Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_v3r3.hpp.E0F0A6CCA772EDE7.idx differ diff --git a/.cache/clangd/index/gridwise_multiblock_batchnorm_forward.hpp.4F73C2A5286FBB35.idx b/.cache/clangd/index/gridwise_multiblock_batchnorm_forward.hpp.4F73C2A5286FBB35.idx new file mode 100755 index 0000000000000000000000000000000000000000..d8f7cde403e5a263f0b08dc085de8134ebdc1cea Binary files /dev/null and b/.cache/clangd/index/gridwise_multiblock_batchnorm_forward.hpp.4F73C2A5286FBB35.idx differ diff --git a/.cache/clangd/index/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp.C7E55A256926B340.idx b/.cache/clangd/index/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp.C7E55A256926B340.idx new file mode 100755 index 0000000000000000000000000000000000000000..2dde4d948d3e65d0c5cd0953b7e4a366a61400ee Binary files /dev/null and b/.cache/clangd/index/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp.C7E55A256926B340.idx differ diff --git a/.cache/clangd/index/gridwise_multiblock_welford_first_half.hpp.25F962D583FA62A6.idx b/.cache/clangd/index/gridwise_multiblock_welford_first_half.hpp.25F962D583FA62A6.idx new file mode 100755 index 0000000000000000000000000000000000000000..24f967f82813d3864b05b87c418a93d913c10392 Binary files /dev/null and b/.cache/clangd/index/gridwise_multiblock_welford_first_half.hpp.25F962D583FA62A6.idx differ diff --git a/.cache/clangd/index/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp.BEA9938289B2ECCF.idx b/.cache/clangd/index/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp.BEA9938289B2ECCF.idx new file mode 100755 index 0000000000000000000000000000000000000000..7ffd94845e2cedca2f2294fd5bdd7301dd534e1f Binary files /dev/null and b/.cache/clangd/index/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp.BEA9938289B2ECCF.idx differ diff --git a/.cache/clangd/index/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp.03EA80728C2C5A38.idx b/.cache/clangd/index/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp.03EA80728C2C5A38.idx new file mode 100755 index 0000000000000000000000000000000000000000..63250ba5410719e066ac86e894d9a59ea3f4fc9a Binary files /dev/null and b/.cache/clangd/index/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp.03EA80728C2C5A38.idx differ diff --git a/.cache/clangd/index/gridwise_normalization_bwd_data.hpp.43B5DE0C3D25BB20.idx b/.cache/clangd/index/gridwise_normalization_bwd_data.hpp.43B5DE0C3D25BB20.idx new file mode 100755 index 0000000000000000000000000000000000000000..4e368f0358218880dd43c25332e447da7552493f Binary files /dev/null and b/.cache/clangd/index/gridwise_normalization_bwd_data.hpp.43B5DE0C3D25BB20.idx differ diff --git a/.cache/clangd/index/gridwise_normalization_bwd_gamma_beta.hpp.8028572B7299488D.idx b/.cache/clangd/index/gridwise_normalization_bwd_gamma_beta.hpp.8028572B7299488D.idx new file mode 100755 index 0000000000000000000000000000000000000000..c1995295652892588439078b1bbdcaafbe38fc30 Binary files /dev/null and b/.cache/clangd/index/gridwise_normalization_bwd_gamma_beta.hpp.8028572B7299488D.idx differ diff --git a/.cache/clangd/index/gridwise_normalization_naive_variance.hpp.9BD4CEF549374198.idx b/.cache/clangd/index/gridwise_normalization_naive_variance.hpp.9BD4CEF549374198.idx new file mode 100755 index 0000000000000000000000000000000000000000..825ae9834bc7cc5ac4f4cf9c7a12cad70311f14b Binary files /dev/null and b/.cache/clangd/index/gridwise_normalization_naive_variance.hpp.9BD4CEF549374198.idx differ diff --git a/.cache/clangd/index/gridwise_normalization_selector.hpp.88C73ECDE0BC86D2.idx b/.cache/clangd/index/gridwise_normalization_selector.hpp.88C73ECDE0BC86D2.idx new file mode 100755 index 0000000000000000000000000000000000000000..ca921cfb64905e498542e641e487ca896c0ce97e Binary files /dev/null and b/.cache/clangd/index/gridwise_normalization_selector.hpp.88C73ECDE0BC86D2.idx differ diff --git a/.cache/clangd/index/gridwise_normalization_splitk_1st.hpp.6CD10C30DDA5197D.idx b/.cache/clangd/index/gridwise_normalization_splitk_1st.hpp.6CD10C30DDA5197D.idx new file mode 100755 index 0000000000000000000000000000000000000000..7daeb203c75097d835fdaa995ed9fe406662e2bb Binary files /dev/null and b/.cache/clangd/index/gridwise_normalization_splitk_1st.hpp.6CD10C30DDA5197D.idx differ diff --git a/.cache/clangd/index/gridwise_normalization_splitk_2nd.hpp.FE37C91CA5D8EB00.idx b/.cache/clangd/index/gridwise_normalization_splitk_2nd.hpp.FE37C91CA5D8EB00.idx new file mode 100755 index 0000000000000000000000000000000000000000..a16d1b9a62e5c8ad0f6803ceacf97e2b3ded6dc9 Binary files /dev/null and b/.cache/clangd/index/gridwise_normalization_splitk_2nd.hpp.FE37C91CA5D8EB00.idx differ diff --git a/.cache/clangd/index/gridwise_normalization_welford_variance.hpp.FE77CD9DBE7C3CF0.idx b/.cache/clangd/index/gridwise_normalization_welford_variance.hpp.FE77CD9DBE7C3CF0.idx new file mode 100755 index 0000000000000000000000000000000000000000..b290bf26d1b8a128b4f8b3e147f9c39d088a4a22 Binary files /dev/null and b/.cache/clangd/index/gridwise_normalization_welford_variance.hpp.FE77CD9DBE7C3CF0.idx differ diff --git a/.cache/clangd/index/gridwise_permute.hpp.E425D88E25D439BC.idx b/.cache/clangd/index/gridwise_permute.hpp.E425D88E25D439BC.idx new file mode 100755 index 0000000000000000000000000000000000000000..92605ba22f1302645fcf2722543d4eb07f829ef3 Binary files /dev/null and b/.cache/clangd/index/gridwise_permute.hpp.E425D88E25D439BC.idx differ diff --git a/.cache/clangd/index/gridwise_put_element_1d.hpp.59E84F53787BFD31.idx b/.cache/clangd/index/gridwise_put_element_1d.hpp.59E84F53787BFD31.idx new file mode 100755 index 0000000000000000000000000000000000000000..2d2d044e85bf042e9a466980ec79b19634146392 Binary files /dev/null and b/.cache/clangd/index/gridwise_put_element_1d.hpp.59E84F53787BFD31.idx differ diff --git a/.cache/clangd/index/gridwise_set_buffer_value.hpp.1407258F85A006CD.idx b/.cache/clangd/index/gridwise_set_buffer_value.hpp.1407258F85A006CD.idx new file mode 100755 index 0000000000000000000000000000000000000000..3d89a493cc1da4ffae1a5dd3fa0eac2cd8c23490 Binary files /dev/null and b/.cache/clangd/index/gridwise_set_buffer_value.hpp.1407258F85A006CD.idx differ diff --git a/.cache/clangd/index/gridwise_set_multiple_buffer_value.hpp.77B0D514858D3743.idx b/.cache/clangd/index/gridwise_set_multiple_buffer_value.hpp.77B0D514858D3743.idx new file mode 100755 index 0000000000000000000000000000000000000000..294da0981fa08dd288c8cd1757b5345fd892d725 Binary files /dev/null and b/.cache/clangd/index/gridwise_set_multiple_buffer_value.hpp.77B0D514858D3743.idx differ diff --git a/.cache/clangd/index/gridwise_softmax.hpp.5A728FA36C708E60.idx b/.cache/clangd/index/gridwise_softmax.hpp.5A728FA36C708E60.idx new file mode 100755 index 0000000000000000000000000000000000000000..35ae5d9bb851dc7c31784276f2d52802ebfddaee Binary files /dev/null and b/.cache/clangd/index/gridwise_softmax.hpp.5A728FA36C708E60.idx differ diff --git a/.cache/clangd/index/gridwise_sparse_embeddings_forward_layernorm.hpp.9F9F7C52CFA18397.idx b/.cache/clangd/index/gridwise_sparse_embeddings_forward_layernorm.hpp.9F9F7C52CFA18397.idx new file mode 100755 index 0000000000000000000000000000000000000000..88cb9c2df87a4ecb22fa98b013eb2293e5b16f3f Binary files /dev/null and b/.cache/clangd/index/gridwise_sparse_embeddings_forward_layernorm.hpp.9F9F7C52CFA18397.idx differ diff --git a/.cache/clangd/index/gridwise_tensor_rearrange.hpp.73B250CAB9F4E6DB.idx b/.cache/clangd/index/gridwise_tensor_rearrange.hpp.73B250CAB9F4E6DB.idx new file mode 100755 index 0000000000000000000000000000000000000000..1b6a64470b07ae0b4e9ac33d5bafadf427465529 Binary files /dev/null and b/.cache/clangd/index/gridwise_tensor_rearrange.hpp.73B250CAB9F4E6DB.idx differ diff --git a/.cache/clangd/index/gridwise_welford_second_half_layernorm2d.hpp.52F6FAA1A972E598.idx b/.cache/clangd/index/gridwise_welford_second_half_layernorm2d.hpp.52F6FAA1A972E598.idx new file mode 100755 index 0000000000000000000000000000000000000000..61f1acebc0b1a3e0b0f5a325417f930d26e96af1 Binary files /dev/null and b/.cache/clangd/index/gridwise_welford_second_half_layernorm2d.hpp.52F6FAA1A972E598.idx differ diff --git a/.cache/clangd/index/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp.2CC919AB5B044CF3.idx b/.cache/clangd/index/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp.2CC919AB5B044CF3.idx new file mode 100755 index 0000000000000000000000000000000000000000..01510a6fe4552490ffbe9274eae4dce4619f9ace Binary files /dev/null and b/.cache/clangd/index/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp.2CC919AB5B044CF3.idx differ diff --git a/.cache/clangd/index/grouped_conv_bwd_data_xdl_fp16.cpp.618FF0F87C438165.idx b/.cache/clangd/index/grouped_conv_bwd_data_xdl_fp16.cpp.618FF0F87C438165.idx new file mode 100755 index 0000000000000000000000000000000000000000..74b14dceb66377f3bc3b7157dd6f2d2b8cd7cc98 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_bwd_data_xdl_fp16.cpp.618FF0F87C438165.idx differ diff --git a/.cache/clangd/index/grouped_conv_bwd_weight_xdl_bf16.cpp.3B512EE28C055424.idx b/.cache/clangd/index/grouped_conv_bwd_weight_xdl_bf16.cpp.3B512EE28C055424.idx new file mode 100755 index 0000000000000000000000000000000000000000..b842af19e713123087fa9f936ee1483d9f2ab88c Binary files /dev/null and b/.cache/clangd/index/grouped_conv_bwd_weight_xdl_bf16.cpp.3B512EE28C055424.idx differ diff --git a/.cache/clangd/index/grouped_conv_bwd_weight_xdl_fp16.cpp.35241A5239C4258D.idx b/.cache/clangd/index/grouped_conv_bwd_weight_xdl_fp16.cpp.35241A5239C4258D.idx new file mode 100755 index 0000000000000000000000000000000000000000..4f89411caf39ad8f89c1fb36e236ce70359e2823 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_bwd_weight_xdl_fp16.cpp.35241A5239C4258D.idx differ diff --git a/.cache/clangd/index/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp.BD38E20DB8B3998F.idx b/.cache/clangd/index/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp.BD38E20DB8B3998F.idx new file mode 100755 index 0000000000000000000000000000000000000000..27771b24a7378dca1ab2a22a5f484cc595887991 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp.BD38E20DB8B3998F.idx differ diff --git a/.cache/clangd/index/grouped_conv_conv_fwd_xdl_bf16.cpp.2A5915DD526E33D4.idx b/.cache/clangd/index/grouped_conv_conv_fwd_xdl_bf16.cpp.2A5915DD526E33D4.idx new file mode 100755 index 0000000000000000000000000000000000000000..0784ff7125a1e255559fd6b4a679ec57e61b0646 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_conv_fwd_xdl_bf16.cpp.2A5915DD526E33D4.idx differ diff --git a/.cache/clangd/index/grouped_conv_conv_fwd_xdl_fp16.cpp.F0393DD4FF0F531D.idx b/.cache/clangd/index/grouped_conv_conv_fwd_xdl_fp16.cpp.F0393DD4FF0F531D.idx new file mode 100755 index 0000000000000000000000000000000000000000..d48c20886a0efc558de70e081d3660d5edeb72db Binary files /dev/null and b/.cache/clangd/index/grouped_conv_conv_fwd_xdl_fp16.cpp.F0393DD4FF0F531D.idx differ diff --git a/.cache/clangd/index/grouped_conv_conv_fwd_xdl_fp32.cpp.329BE83F917399A6.idx b/.cache/clangd/index/grouped_conv_conv_fwd_xdl_fp32.cpp.329BE83F917399A6.idx new file mode 100755 index 0000000000000000000000000000000000000000..132e34fee53030c83a44cc4e5d75e79936f481fa Binary files /dev/null and b/.cache/clangd/index/grouped_conv_conv_fwd_xdl_fp32.cpp.329BE83F917399A6.idx differ diff --git a/.cache/clangd/index/grouped_conv_conv_fwd_xdl_int8.cpp.E847303767D0E187.idx b/.cache/clangd/index/grouped_conv_conv_fwd_xdl_int8.cpp.E847303767D0E187.idx new file mode 100755 index 0000000000000000000000000000000000000000..ff226374186700ca4347ce48698c4837f789b2cb Binary files /dev/null and b/.cache/clangd/index/grouped_conv_conv_fwd_xdl_int8.cpp.E847303767D0E187.idx differ diff --git a/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp.E2C08E4AF953E7F0.idx b/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp.E2C08E4AF953E7F0.idx new file mode 100755 index 0000000000000000000000000000000000000000..4e188e8fabab391d25c3576ff784b61b8bdea330 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp.E2C08E4AF953E7F0.idx differ diff --git a/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp.9CA386C0ABEF35F6.idx b/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp.9CA386C0ABEF35F6.idx new file mode 100755 index 0000000000000000000000000000000000000000..a502ab69f2c259b0021a0949fa7ee1f6937150fa Binary files /dev/null and b/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp.9CA386C0ABEF35F6.idx differ diff --git a/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp.77C435750269A2BB.idx b/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp.77C435750269A2BB.idx new file mode 100755 index 0000000000000000000000000000000000000000..b2fcfbf15b5aa6c3cd26f309a12326c446d66e11 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp.77C435750269A2BB.idx differ diff --git a/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp.8BC00DE6D83E6AA0.idx b/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp.8BC00DE6D83E6AA0.idx new file mode 100755 index 0000000000000000000000000000000000000000..6bc6bead4721f0e7bb607f9c0be52d0bd1e82e90 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp.8BC00DE6D83E6AA0.idx differ diff --git a/.cache/clangd/index/grouped_conv_fwd_xdl_fp16.cpp.F004628471E9028F.idx b/.cache/clangd/index/grouped_conv_fwd_xdl_fp16.cpp.F004628471E9028F.idx new file mode 100755 index 0000000000000000000000000000000000000000..eb93cb35275871df63e45b3f98d948bda45f1657 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_fwd_xdl_fp16.cpp.F004628471E9028F.idx differ diff --git a/.cache/clangd/index/grouped_convolution_backward_data.hpp.04038859D7920D2D.idx b/.cache/clangd/index/grouped_convolution_backward_data.hpp.04038859D7920D2D.idx new file mode 100755 index 0000000000000000000000000000000000000000..912b03ae24777309c0ee71cf5316fed733f09a2e Binary files /dev/null and b/.cache/clangd/index/grouped_convolution_backward_data.hpp.04038859D7920D2D.idx differ diff --git a/.cache/clangd/index/grouped_convolution_backward_weight.hpp.CF4258D0B5FB5557.idx b/.cache/clangd/index/grouped_convolution_backward_weight.hpp.CF4258D0B5FB5557.idx new file mode 100755 index 0000000000000000000000000000000000000000..ca22b0b5b07f00bc4a8df92fb4c2dede5ba4ad10 Binary files /dev/null and b/.cache/clangd/index/grouped_convolution_backward_weight.hpp.CF4258D0B5FB5557.idx differ diff --git a/.cache/clangd/index/grouped_convolution_forward.hpp.3DF8CF239601D4CC.idx b/.cache/clangd/index/grouped_convolution_forward.hpp.3DF8CF239601D4CC.idx new file mode 100755 index 0000000000000000000000000000000000000000..5126d61e64db6c868c4547f0e31294096e16c488 Binary files /dev/null and b/.cache/clangd/index/grouped_convolution_forward.hpp.3DF8CF239601D4CC.idx differ diff --git a/.cache/clangd/index/grouped_gemm.hpp.76EFEC97F28430A5.idx b/.cache/clangd/index/grouped_gemm.hpp.76EFEC97F28430A5.idx new file mode 100755 index 0000000000000000000000000000000000000000..fb85a305bc7c2ccefbffe85eb14c2b1308253e4d Binary files /dev/null and b/.cache/clangd/index/grouped_gemm.hpp.76EFEC97F28430A5.idx differ diff --git a/.cache/clangd/index/grouped_gemm_bias_e_permute_xdl_fp16.cpp.D04ED4238574C098.idx b/.cache/clangd/index/grouped_gemm_bias_e_permute_xdl_fp16.cpp.D04ED4238574C098.idx new file mode 100755 index 0000000000000000000000000000000000000000..94f4af8d8838345c12a9426f1e9e42018cfe4271 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_bias_e_permute_xdl_fp16.cpp.D04ED4238574C098.idx differ diff --git a/.cache/clangd/index/grouped_gemm_fastgelu.hpp.81337D2196A77665.idx b/.cache/clangd/index/grouped_gemm_fastgelu.hpp.81337D2196A77665.idx new file mode 100755 index 0000000000000000000000000000000000000000..9b2bfe44041f69c9fc50d575128790c69a7e7f5a Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_fastgelu.hpp.81337D2196A77665.idx differ diff --git a/.cache/clangd/index/grouped_gemm_fixed_nk.hpp.92D3931E2C242798.idx b/.cache/clangd/index/grouped_gemm_fixed_nk.hpp.92D3931E2C242798.idx new file mode 100755 index 0000000000000000000000000000000000000000..706f325710612f7d3b90ce864e30b40754cc75f4 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_fixed_nk.hpp.92D3931E2C242798.idx differ diff --git a/.cache/clangd/index/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp.E01D0ADA6F0061BF.idx b/.cache/clangd/index/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp.E01D0ADA6F0061BF.idx new file mode 100755 index 0000000000000000000000000000000000000000..6b084ae928db958ab7a4491d5624c4e63a229e14 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp.E01D0ADA6F0061BF.idx differ diff --git a/.cache/clangd/index/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp.9FDD703AE6D2D15E.idx b/.cache/clangd/index/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp.9FDD703AE6D2D15E.idx new file mode 100755 index 0000000000000000000000000000000000000000..21374b5298fd764b31d5e069472950923741dbc8 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp.9FDD703AE6D2D15E.idx differ diff --git a/.cache/clangd/index/grouped_gemm_xdl_bf16.cpp.60909DDCF9BB055C.idx b/.cache/clangd/index/grouped_gemm_xdl_bf16.cpp.60909DDCF9BB055C.idx new file mode 100755 index 0000000000000000000000000000000000000000..e05731409201f13b1b493701de9234c9e10d91ad Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_xdl_bf16.cpp.60909DDCF9BB055C.idx differ diff --git a/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp.3B6907AB640AF24B.idx b/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp.3B6907AB640AF24B.idx new file mode 100755 index 0000000000000000000000000000000000000000..23a6a75d0277e39776f53226bd3865b349fd577d Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp.3B6907AB640AF24B.idx differ diff --git a/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_fp16.cpp.C49639C51E0DF485.idx b/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_fp16.cpp.C49639C51E0DF485.idx new file mode 100755 index 0000000000000000000000000000000000000000..c29908045b41b5c9a0cbc786546032f43a1e9d37 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_fp16.cpp.C49639C51E0DF485.idx differ diff --git a/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_fp8.cpp.33128DEF03F0DD92.idx b/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_fp8.cpp.33128DEF03F0DD92.idx new file mode 100755 index 0000000000000000000000000000000000000000..7ba6a001911e1919997b94d8dc8688796e0fa394 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_fp8.cpp.33128DEF03F0DD92.idx differ diff --git a/.cache/clangd/index/grouped_gemm_xdl_fp16.cpp.164525B4D87B1687.idx b/.cache/clangd/index/grouped_gemm_xdl_fp16.cpp.164525B4D87B1687.idx new file mode 100755 index 0000000000000000000000000000000000000000..795190ee066bd4c6001cf00a312483ba39fe5abd Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_xdl_fp16.cpp.164525B4D87B1687.idx differ diff --git a/.cache/clangd/index/grouped_gemm_xdl_fp32.cpp.F97F080B232E6F17.idx b/.cache/clangd/index/grouped_gemm_xdl_fp32.cpp.F97F080B232E6F17.idx new file mode 100755 index 0000000000000000000000000000000000000000..18ce02b5d578091f99071e6681dbefb48695a0f6 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_xdl_fp32.cpp.F97F080B232E6F17.idx differ diff --git a/.cache/clangd/index/grouped_gemm_xdl_int8.cpp.01E8A20D5ECCA353.idx b/.cache/clangd/index/grouped_gemm_xdl_int8.cpp.01E8A20D5ECCA353.idx new file mode 100755 index 0000000000000000000000000000000000000000..86b63ef023adb0d545bd491243607d8fc1898106 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_xdl_int8.cpp.01E8A20D5ECCA353.idx differ diff --git a/.cache/clangd/index/grouped_gemm_xdl_splitk_fp16.cpp.5CCCCCFEB9008060.idx b/.cache/clangd/index/grouped_gemm_xdl_splitk_fp16.cpp.5CCCCCFEB9008060.idx new file mode 100755 index 0000000000000000000000000000000000000000..24606faa17ef91853c3b83444f64de772564007a Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_xdl_splitk_fp16.cpp.5CCCCCFEB9008060.idx differ diff --git a/.cache/clangd/index/groupnorm_bwd_data.hpp.3714A9EE70464716.idx b/.cache/clangd/index/groupnorm_bwd_data.hpp.3714A9EE70464716.idx new file mode 100755 index 0000000000000000000000000000000000000000..e329fd2d4983420b281ffc3d9b9e937f3cb55caf Binary files /dev/null and b/.cache/clangd/index/groupnorm_bwd_data.hpp.3714A9EE70464716.idx differ diff --git a/.cache/clangd/index/groupnorm_bwd_fp32.cpp.94FC313C790AEF66.idx b/.cache/clangd/index/groupnorm_bwd_fp32.cpp.94FC313C790AEF66.idx new file mode 100755 index 0000000000000000000000000000000000000000..1308e7be6001b9ad1456fa17dc0167e06b51c26b Binary files /dev/null and b/.cache/clangd/index/groupnorm_bwd_fp32.cpp.94FC313C790AEF66.idx differ diff --git a/.cache/clangd/index/groupnorm_bwd_gamma_beta.hpp.A4BF65E232956D79.idx b/.cache/clangd/index/groupnorm_bwd_gamma_beta.hpp.A4BF65E232956D79.idx new file mode 100755 index 0000000000000000000000000000000000000000..bb8653dea919f2a3b8eaa561e75c90998f73248b Binary files /dev/null and b/.cache/clangd/index/groupnorm_bwd_gamma_beta.hpp.A4BF65E232956D79.idx differ diff --git a/.cache/clangd/index/groupnorm_fwd_sigmoid_mul_fp16.cpp.E1BDC41DAA5F65CC.idx b/.cache/clangd/index/groupnorm_fwd_sigmoid_mul_fp16.cpp.E1BDC41DAA5F65CC.idx new file mode 100755 index 0000000000000000000000000000000000000000..d6f38e12744e4f566b0748450bc21cdca099094a Binary files /dev/null and b/.cache/clangd/index/groupnorm_fwd_sigmoid_mul_fp16.cpp.E1BDC41DAA5F65CC.idx differ diff --git a/.cache/clangd/index/groupnorm_fwd_splitk_fp16.cpp.5CDEA132226AE994.idx b/.cache/clangd/index/groupnorm_fwd_splitk_fp16.cpp.5CDEA132226AE994.idx new file mode 100755 index 0000000000000000000000000000000000000000..28672cfe3bf4d51777b3eaa3687550471fa4bafb Binary files /dev/null and b/.cache/clangd/index/groupnorm_fwd_splitk_fp16.cpp.5CDEA132226AE994.idx differ diff --git a/.cache/clangd/index/groupnorm_fwd_swish_fp16.cpp.FDEC9D120BB249C9.idx b/.cache/clangd/index/groupnorm_fwd_swish_fp16.cpp.FDEC9D120BB249C9.idx new file mode 100755 index 0000000000000000000000000000000000000000..0fe62992ff090339e8933dcd4d546cc9d1577bdf Binary files /dev/null and b/.cache/clangd/index/groupnorm_fwd_swish_fp16.cpp.FDEC9D120BB249C9.idx differ diff --git a/.cache/clangd/index/hip_check_error.hpp.B6401497C46CBC5E.idx b/.cache/clangd/index/hip_check_error.hpp.B6401497C46CBC5E.idx new file mode 100755 index 0000000000000000000000000000000000000000..9a8aea2450cacff7de55e7a9af26fe41d524e499 Binary files /dev/null and b/.cache/clangd/index/hip_check_error.hpp.B6401497C46CBC5E.idx differ diff --git a/.cache/clangd/index/host_common_util.hpp.DD6CA1FC5D70D95B.idx b/.cache/clangd/index/host_common_util.hpp.DD6CA1FC5D70D95B.idx new file mode 100755 index 0000000000000000000000000000000000000000..2e65e35f388af695b55398206647c1d2deca8b99 Binary files /dev/null and b/.cache/clangd/index/host_common_util.hpp.DD6CA1FC5D70D95B.idx differ diff --git a/.cache/clangd/index/host_tensor.cpp.BD60013DE8F91330.idx b/.cache/clangd/index/host_tensor.cpp.BD60013DE8F91330.idx new file mode 100755 index 0000000000000000000000000000000000000000..a83a8bdeeeba5e55bb40fa7bffe6d4e6d3fc10f2 Binary files /dev/null and b/.cache/clangd/index/host_tensor.cpp.BD60013DE8F91330.idx differ diff --git a/.cache/clangd/index/host_tensor.hpp.A2A2489B4F3A4E06.idx b/.cache/clangd/index/host_tensor.hpp.A2A2489B4F3A4E06.idx new file mode 100755 index 0000000000000000000000000000000000000000..62d34570431cd0893ae85e131b3be636844c9241 Binary files /dev/null and b/.cache/clangd/index/host_tensor.hpp.A2A2489B4F3A4E06.idx differ diff --git a/.cache/clangd/index/host_tensor_generator.hpp.44F3F167E8AE0CAB.idx b/.cache/clangd/index/host_tensor_generator.hpp.44F3F167E8AE0CAB.idx new file mode 100755 index 0000000000000000000000000000000000000000..b15a31f9af9223ca21e61ffa217113ccf64357c2 Binary files /dev/null and b/.cache/clangd/index/host_tensor_generator.hpp.44F3F167E8AE0CAB.idx differ diff --git a/.cache/clangd/index/ignore.hpp.67D37926F4126F70.idx b/.cache/clangd/index/ignore.hpp.67D37926F4126F70.idx new file mode 100755 index 0000000000000000000000000000000000000000..15ce55e538ce41b270023778543157ce92c6fafe Binary files /dev/null and b/.cache/clangd/index/ignore.hpp.67D37926F4126F70.idx differ diff --git a/.cache/clangd/index/image_to_column_f32.cpp.E393FC8829D80520.idx b/.cache/clangd/index/image_to_column_f32.cpp.E393FC8829D80520.idx new file mode 100755 index 0000000000000000000000000000000000000000..e0e30170239e51c45661cc71262b267fd7e335ed Binary files /dev/null and b/.cache/clangd/index/image_to_column_f32.cpp.E393FC8829D80520.idx differ diff --git a/.cache/clangd/index/inner_product.hpp.D15AD8CC8FA359B4.idx b/.cache/clangd/index/inner_product.hpp.D15AD8CC8FA359B4.idx new file mode 100755 index 0000000000000000000000000000000000000000..2dc6ce05b1bc4eb3cdb855d56ce650aecafee144 Binary files /dev/null and b/.cache/clangd/index/inner_product.hpp.D15AD8CC8FA359B4.idx differ diff --git a/.cache/clangd/index/inner_product_dpp8.hpp.5615826ED9AC4030.idx b/.cache/clangd/index/inner_product_dpp8.hpp.5615826ED9AC4030.idx new file mode 100755 index 0000000000000000000000000000000000000000..109529eadaa85dfa12810052d8d53d8730d805c5 Binary files /dev/null and b/.cache/clangd/index/inner_product_dpp8.hpp.5615826ED9AC4030.idx differ diff --git a/.cache/clangd/index/integral_constant.hpp.A156C83DCD53E7CE.idx b/.cache/clangd/index/integral_constant.hpp.A156C83DCD53E7CE.idx new file mode 100755 index 0000000000000000000000000000000000000000..a78929738a4fc01357e804f33754aa61a5ceadc4 Binary files /dev/null and b/.cache/clangd/index/integral_constant.hpp.A156C83DCD53E7CE.idx differ diff --git a/.cache/clangd/index/io.hpp.27CD569BA11007AA.idx b/.cache/clangd/index/io.hpp.27CD569BA11007AA.idx new file mode 100755 index 0000000000000000000000000000000000000000..110000ff4319187ef9467d3d8ce21d7ab8298f8e Binary files /dev/null and b/.cache/clangd/index/io.hpp.27CD569BA11007AA.idx differ diff --git a/.cache/clangd/index/is_detected.hpp.993129C9FD0FB446.idx b/.cache/clangd/index/is_detected.hpp.993129C9FD0FB446.idx new file mode 100755 index 0000000000000000000000000000000000000000..32c81bd745d8c8f4ebe709df5fb7afecbeab8dda Binary files /dev/null and b/.cache/clangd/index/is_detected.hpp.993129C9FD0FB446.idx differ diff --git a/.cache/clangd/index/is_known_at_compile_time.hpp.C66FF603A6D2FF7E.idx b/.cache/clangd/index/is_known_at_compile_time.hpp.C66FF603A6D2FF7E.idx new file mode 100755 index 0000000000000000000000000000000000000000..2b939712fe758f01cde5f0c2d2b6e0a79044194f Binary files /dev/null and b/.cache/clangd/index/is_known_at_compile_time.hpp.C66FF603A6D2FF7E.idx differ diff --git a/.cache/clangd/index/iterator.hpp.DDB0074CE39B229F.idx b/.cache/clangd/index/iterator.hpp.DDB0074CE39B229F.idx new file mode 100755 index 0000000000000000000000000000000000000000..a1c2cef69738048e8a49971db95c444033c9882b Binary files /dev/null and b/.cache/clangd/index/iterator.hpp.DDB0074CE39B229F.idx differ diff --git a/.cache/clangd/index/kernel_launch.hpp.4FE4793F82580CE2.idx b/.cache/clangd/index/kernel_launch.hpp.4FE4793F82580CE2.idx new file mode 100755 index 0000000000000000000000000000000000000000..1892ce568ba300b323cc966066c9227b76783126 Binary files /dev/null and b/.cache/clangd/index/kernel_launch.hpp.4FE4793F82580CE2.idx differ diff --git a/.cache/clangd/index/kernel_utils.hpp.A2D72F83F06BC2A4.idx b/.cache/clangd/index/kernel_utils.hpp.A2D72F83F06BC2A4.idx new file mode 100755 index 0000000000000000000000000000000000000000..901aa0396e9ad4310872f8124cc5132262b85dad Binary files /dev/null and b/.cache/clangd/index/kernel_utils.hpp.A2D72F83F06BC2A4.idx differ diff --git a/.cache/clangd/index/km_kn_mn_add_instance.cpp.5CD53C62DE24ED15.idx b/.cache/clangd/index/km_kn_mn_add_instance.cpp.5CD53C62DE24ED15.idx new file mode 100755 index 0000000000000000000000000000000000000000..fabaa1ec80a2221b1ace8b56eb8315ce4e2c2382 Binary files /dev/null and b/.cache/clangd/index/km_kn_mn_add_instance.cpp.5CD53C62DE24ED15.idx differ diff --git a/.cache/clangd/index/km_kn_mn_default_pipeline_v1_instance.cpp.2F582983A57414AF.idx b/.cache/clangd/index/km_kn_mn_default_pipeline_v1_instance.cpp.2F582983A57414AF.idx new file mode 100755 index 0000000000000000000000000000000000000000..64874d79d7aac87aa6aaea98bcfb6481178caf5b Binary files /dev/null and b/.cache/clangd/index/km_kn_mn_default_pipeline_v1_instance.cpp.2F582983A57414AF.idx differ diff --git a/.cache/clangd/index/km_kn_mn_default_pipeline_v2_instance.cpp.FDC41D8D362237D7.idx b/.cache/clangd/index/km_kn_mn_default_pipeline_v2_instance.cpp.FDC41D8D362237D7.idx new file mode 100755 index 0000000000000000000000000000000000000000..6afb5fd2b2ee078a1195c978618aa5b5ba58d486 Binary files /dev/null and b/.cache/clangd/index/km_kn_mn_default_pipeline_v2_instance.cpp.FDC41D8D362237D7.idx differ diff --git a/.cache/clangd/index/km_kn_mn_default_pipeline_v2_opt_instance.cpp.9D0F889F526341B0.idx b/.cache/clangd/index/km_kn_mn_default_pipeline_v2_opt_instance.cpp.9D0F889F526341B0.idx new file mode 100755 index 0000000000000000000000000000000000000000..a4604b8d50abae449e9a299ac2e62a7299f56299 Binary files /dev/null and b/.cache/clangd/index/km_kn_mn_default_pipeline_v2_opt_instance.cpp.9D0F889F526341B0.idx differ diff --git a/.cache/clangd/index/km_kn_mn_interwave_pipeline_v1_instance.cpp.536F114F5BA9349C.idx b/.cache/clangd/index/km_kn_mn_interwave_pipeline_v1_instance.cpp.536F114F5BA9349C.idx new file mode 100755 index 0000000000000000000000000000000000000000..da0a6e851430b25aa4be1c542d58c40988209947 Binary files /dev/null and b/.cache/clangd/index/km_kn_mn_interwave_pipeline_v1_instance.cpp.536F114F5BA9349C.idx differ diff --git a/.cache/clangd/index/km_kn_mn_irregular_default_pipeline_v1_instance.cpp.5937911B0257B674.idx b/.cache/clangd/index/km_kn_mn_irregular_default_pipeline_v1_instance.cpp.5937911B0257B674.idx new file mode 100755 index 0000000000000000000000000000000000000000..001ed6cbef886fdedb48a1da89ba71f68cd9a46c Binary files /dev/null and b/.cache/clangd/index/km_kn_mn_irregular_default_pipeline_v1_instance.cpp.5937911B0257B674.idx differ diff --git a/.cache/clangd/index/km_kn_mn_irregular_default_pipeline_v2_instance.cpp.0318E39A03EE883F.idx b/.cache/clangd/index/km_kn_mn_irregular_default_pipeline_v2_instance.cpp.0318E39A03EE883F.idx new file mode 100755 index 0000000000000000000000000000000000000000..54f5f1af2c7da54642fcc71df24bd32aaea930ed Binary files /dev/null and b/.cache/clangd/index/km_kn_mn_irregular_default_pipeline_v2_instance.cpp.0318E39A03EE883F.idx differ diff --git a/.cache/clangd/index/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp.0A7ACF40DAB7236A.idx b/.cache/clangd/index/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp.0A7ACF40DAB7236A.idx new file mode 100755 index 0000000000000000000000000000000000000000..cf87f69f3a46c9bab0da6339671f843f2b33ebd8 Binary files /dev/null and b/.cache/clangd/index/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp.0A7ACF40DAB7236A.idx differ diff --git a/.cache/clangd/index/km_nk_mn_add_instance.cpp.09C78F4C6D71CC59.idx b/.cache/clangd/index/km_nk_mn_add_instance.cpp.09C78F4C6D71CC59.idx new file mode 100755 index 0000000000000000000000000000000000000000..ea2909a4d6f79ba2e871af0267911ff3db6848bd Binary files /dev/null and b/.cache/clangd/index/km_nk_mn_add_instance.cpp.09C78F4C6D71CC59.idx differ diff --git a/.cache/clangd/index/km_nk_mn_default_pipeline_v1_instance.cpp.7011152E4CED8319.idx b/.cache/clangd/index/km_nk_mn_default_pipeline_v1_instance.cpp.7011152E4CED8319.idx new file mode 100755 index 0000000000000000000000000000000000000000..a8e6d099799df6ec2af6a25f6c7be03c95c98270 Binary files /dev/null and b/.cache/clangd/index/km_nk_mn_default_pipeline_v1_instance.cpp.7011152E4CED8319.idx differ diff --git a/.cache/clangd/index/km_nk_mn_default_pipeline_v2_instance.cpp.4ADB4E587F42B7E9.idx b/.cache/clangd/index/km_nk_mn_default_pipeline_v2_instance.cpp.4ADB4E587F42B7E9.idx new file mode 100755 index 0000000000000000000000000000000000000000..2bc4f6e6d80cf6a6de054a2716afadf942f4495d Binary files /dev/null and b/.cache/clangd/index/km_nk_mn_default_pipeline_v2_instance.cpp.4ADB4E587F42B7E9.idx differ diff --git a/.cache/clangd/index/km_nk_mn_default_pipeline_v2_opt_instance.cpp.AA43A9C5EED3E40A.idx b/.cache/clangd/index/km_nk_mn_default_pipeline_v2_opt_instance.cpp.AA43A9C5EED3E40A.idx new file mode 100755 index 0000000000000000000000000000000000000000..266bfcb20bbaaff7e5580eb073425679bf3c7c59 Binary files /dev/null and b/.cache/clangd/index/km_nk_mn_default_pipeline_v2_opt_instance.cpp.AA43A9C5EED3E40A.idx differ diff --git a/.cache/clangd/index/km_nk_mn_interwave_pipeline_v1_instance.cpp.37A32BC5170A6461.idx b/.cache/clangd/index/km_nk_mn_interwave_pipeline_v1_instance.cpp.37A32BC5170A6461.idx new file mode 100755 index 0000000000000000000000000000000000000000..883704a19213b3151fd9b634cfd8cc413ff35d70 Binary files /dev/null and b/.cache/clangd/index/km_nk_mn_interwave_pipeline_v1_instance.cpp.37A32BC5170A6461.idx differ diff --git a/.cache/clangd/index/km_nk_mn_irregular_default_pipeline_v1_instance.cpp.322AFE9D596702FC.idx b/.cache/clangd/index/km_nk_mn_irregular_default_pipeline_v1_instance.cpp.322AFE9D596702FC.idx new file mode 100755 index 0000000000000000000000000000000000000000..6e03792a3c9b07e6ad8affbcd82d376013d8c2b0 Binary files /dev/null and b/.cache/clangd/index/km_nk_mn_irregular_default_pipeline_v1_instance.cpp.322AFE9D596702FC.idx differ diff --git a/.cache/clangd/index/km_nk_mn_irregular_default_pipeline_v2_instance.cpp.6202EDA536A25A77.idx b/.cache/clangd/index/km_nk_mn_irregular_default_pipeline_v2_instance.cpp.6202EDA536A25A77.idx new file mode 100755 index 0000000000000000000000000000000000000000..9b8b3cf3c45c627988afa12bb4fbd9cb025d32d1 Binary files /dev/null and b/.cache/clangd/index/km_nk_mn_irregular_default_pipeline_v2_instance.cpp.6202EDA536A25A77.idx differ diff --git a/.cache/clangd/index/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp.667185EB457E88CA.idx b/.cache/clangd/index/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp.667185EB457E88CA.idx new file mode 100755 index 0000000000000000000000000000000000000000..401eea1ca2dc72bcb581ad1d854ba70123f26f76 Binary files /dev/null and b/.cache/clangd/index/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp.667185EB457E88CA.idx differ diff --git a/.cache/clangd/index/layernorm2d_bwd_fp32.cpp.7066935C4C40FCB0.idx b/.cache/clangd/index/layernorm2d_bwd_fp32.cpp.7066935C4C40FCB0.idx new file mode 100755 index 0000000000000000000000000000000000000000..fce3b22bd6e8272c9743ad31385a5193b61295a9 Binary files /dev/null and b/.cache/clangd/index/layernorm2d_bwd_fp32.cpp.7066935C4C40FCB0.idx differ diff --git a/.cache/clangd/index/layernorm2d_fwd_fp16.cpp.6079EB647B2C4778.idx b/.cache/clangd/index/layernorm2d_fwd_fp16.cpp.6079EB647B2C4778.idx new file mode 100755 index 0000000000000000000000000000000000000000..91828e7461c3e88ecf399c9fb5d99aa4ad98c06c Binary files /dev/null and b/.cache/clangd/index/layernorm2d_fwd_fp16.cpp.6079EB647B2C4778.idx differ diff --git a/.cache/clangd/index/layernorm2d_fwd_splitk_fp16.cpp.7A8CCC9ADABCC2A0.idx b/.cache/clangd/index/layernorm2d_fwd_splitk_fp16.cpp.7A8CCC9ADABCC2A0.idx new file mode 100755 index 0000000000000000000000000000000000000000..5c34b78667a16ea09bc1e3e495d9d52b64b7a257 Binary files /dev/null and b/.cache/clangd/index/layernorm2d_fwd_splitk_fp16.cpp.7A8CCC9ADABCC2A0.idx differ diff --git a/.cache/clangd/index/layernorm4d_fwd_fp16.cpp.FF678597E26B24C3.idx b/.cache/clangd/index/layernorm4d_fwd_fp16.cpp.FF678597E26B24C3.idx new file mode 100755 index 0000000000000000000000000000000000000000..a9c9e329b71c016dae335ec284875c4d6df1ec88 Binary files /dev/null and b/.cache/clangd/index/layernorm4d_fwd_fp16.cpp.FF678597E26B24C3.idx differ diff --git a/.cache/clangd/index/layernorm4d_fwd_splitk_fp16.cpp.BF340D61393FB17F.idx b/.cache/clangd/index/layernorm4d_fwd_splitk_fp16.cpp.BF340D61393FB17F.idx new file mode 100755 index 0000000000000000000000000000000000000000..15bd7b2915c59a266c3622ae92074f977729b02c Binary files /dev/null and b/.cache/clangd/index/layernorm4d_fwd_splitk_fp16.cpp.BF340D61393FB17F.idx differ diff --git a/.cache/clangd/index/layernorm_bwd_data.hpp.D81DD3681C23DD68.idx b/.cache/clangd/index/layernorm_bwd_data.hpp.D81DD3681C23DD68.idx new file mode 100755 index 0000000000000000000000000000000000000000..570dd29f0b939a2533eef1fb94b72f1b091a5cb5 Binary files /dev/null and b/.cache/clangd/index/layernorm_bwd_data.hpp.D81DD3681C23DD68.idx differ diff --git a/.cache/clangd/index/layernorm_bwd_gamma_beta.hpp.8CC35A2FBD2934F9.idx b/.cache/clangd/index/layernorm_bwd_gamma_beta.hpp.8CC35A2FBD2934F9.idx new file mode 100755 index 0000000000000000000000000000000000000000..a8dc6369b10353d5a15c953445cc64ffc7ef8097 Binary files /dev/null and b/.cache/clangd/index/layernorm_bwd_gamma_beta.hpp.8CC35A2FBD2934F9.idx differ diff --git a/.cache/clangd/index/layout.hpp.08D624D3F8E5BFE6.idx b/.cache/clangd/index/layout.hpp.08D624D3F8E5BFE6.idx new file mode 100755 index 0000000000000000000000000000000000000000..aaa21cca052ea0f464ef05704c87ab13c9fbae2c Binary files /dev/null and b/.cache/clangd/index/layout.hpp.08D624D3F8E5BFE6.idx differ diff --git a/.cache/clangd/index/layout_utils.hpp.9A8926719F7B5577.idx b/.cache/clangd/index/layout_utils.hpp.9A8926719F7B5577.idx new file mode 100755 index 0000000000000000000000000000000000000000..54325797bb4c68189546b8231295e0b3aae04a7e Binary files /dev/null and b/.cache/clangd/index/layout_utils.hpp.9A8926719F7B5577.idx differ diff --git a/.cache/clangd/index/literals.hpp.AC03AD34F59B947A.idx b/.cache/clangd/index/literals.hpp.AC03AD34F59B947A.idx new file mode 100755 index 0000000000000000000000000000000000000000..1446589e3aa9c21e1301752b7c5915a4a786ff6b Binary files /dev/null and b/.cache/clangd/index/literals.hpp.AC03AD34F59B947A.idx differ diff --git a/.cache/clangd/index/loop_scheduler.hpp.CEBBF3BFF5C82A80.idx b/.cache/clangd/index/loop_scheduler.hpp.CEBBF3BFF5C82A80.idx new file mode 100755 index 0000000000000000000000000000000000000000..d08b4b335e29949b24824a5c3209022f90e6f98c Binary files /dev/null and b/.cache/clangd/index/loop_scheduler.hpp.CEBBF3BFF5C82A80.idx differ diff --git a/.cache/clangd/index/magic_division.hpp.7DE3E22991AC58AE.idx b/.cache/clangd/index/magic_division.hpp.7DE3E22991AC58AE.idx new file mode 100755 index 0000000000000000000000000000000000000000..db07d13b8dd2579d1fc92b860f6cd56e7164bcde Binary files /dev/null and b/.cache/clangd/index/magic_division.hpp.7DE3E22991AC58AE.idx differ diff --git a/.cache/clangd/index/magic_number_division.cpp.853FDB10989ECFC7.idx b/.cache/clangd/index/magic_number_division.cpp.853FDB10989ECFC7.idx new file mode 100755 index 0000000000000000000000000000000000000000..47987dc360adefd27e40fe1bc27e27fa3ae68617 Binary files /dev/null and b/.cache/clangd/index/magic_number_division.cpp.853FDB10989ECFC7.idx differ diff --git a/.cache/clangd/index/masking_specialization.hpp.2AE80C7347AB00C4.idx b/.cache/clangd/index/masking_specialization.hpp.2AE80C7347AB00C4.idx new file mode 100755 index 0000000000000000000000000000000000000000..7a7dd984d152df06aacaae21d661711e74eb0d2b Binary files /dev/null and b/.cache/clangd/index/masking_specialization.hpp.2AE80C7347AB00C4.idx differ diff --git a/.cache/clangd/index/math.hpp.234B356D49034450.idx b/.cache/clangd/index/math.hpp.234B356D49034450.idx new file mode 100755 index 0000000000000000000000000000000000000000..65b9a6a97467b6c43cd2cef7f50a6284fd57bd98 Binary files /dev/null and b/.cache/clangd/index/math.hpp.234B356D49034450.idx differ diff --git a/.cache/clangd/index/math_v2.hpp.562B4AF9F5A86873.idx b/.cache/clangd/index/math_v2.hpp.562B4AF9F5A86873.idx new file mode 100755 index 0000000000000000000000000000000000000000..badf43a0d894004a74d31eeb6c63517431933a0d Binary files /dev/null and b/.cache/clangd/index/math_v2.hpp.562B4AF9F5A86873.idx differ diff --git a/.cache/clangd/index/matrix_padder.hpp.0539595BAF18EE86.idx b/.cache/clangd/index/matrix_padder.hpp.0539595BAF18EE86.idx new file mode 100755 index 0000000000000000000000000000000000000000..0c57d494039b8318c6ad1af044e1ee042d585f46 Binary files /dev/null and b/.cache/clangd/index/matrix_padder.hpp.0539595BAF18EE86.idx differ diff --git a/.cache/clangd/index/max_pool_bwd.hpp.4F15616E2BF6E596.idx b/.cache/clangd/index/max_pool_bwd.hpp.4F15616E2BF6E596.idx new file mode 100755 index 0000000000000000000000000000000000000000..00b1494d99571ea9b93f6f8a255dc1fe901d67bc Binary files /dev/null and b/.cache/clangd/index/max_pool_bwd.hpp.4F15616E2BF6E596.idx differ diff --git a/.cache/clangd/index/max_pool_bwd_instance_common.hpp.F8135D216BA19663.idx b/.cache/clangd/index/max_pool_bwd_instance_common.hpp.F8135D216BA19663.idx new file mode 100755 index 0000000000000000000000000000000000000000..08da602c1a76b688e85e89e1ffe566d03a218f94 Binary files /dev/null and b/.cache/clangd/index/max_pool_bwd_instance_common.hpp.F8135D216BA19663.idx differ diff --git a/.cache/clangd/index/maxpool2d_bwd_bf16.cpp.E133A720E87AC796.idx b/.cache/clangd/index/maxpool2d_bwd_bf16.cpp.E133A720E87AC796.idx new file mode 100755 index 0000000000000000000000000000000000000000..52057a21a24bb755d92533cb1219ab77ada9077a Binary files /dev/null and b/.cache/clangd/index/maxpool2d_bwd_bf16.cpp.E133A720E87AC796.idx differ diff --git a/.cache/clangd/index/maxpool2d_bwd_common.hpp.4213849B15309F3C.idx b/.cache/clangd/index/maxpool2d_bwd_common.hpp.4213849B15309F3C.idx new file mode 100755 index 0000000000000000000000000000000000000000..6b0173b789316938c2fd41b6dca8a2f13a37c6bb Binary files /dev/null and b/.cache/clangd/index/maxpool2d_bwd_common.hpp.4213849B15309F3C.idx differ diff --git a/.cache/clangd/index/maxpool2d_bwd_fp16.cpp.AA5B517C73C2030C.idx b/.cache/clangd/index/maxpool2d_bwd_fp16.cpp.AA5B517C73C2030C.idx new file mode 100755 index 0000000000000000000000000000000000000000..5a476c72ff119b91b9290a778b342701d5203ac9 Binary files /dev/null and b/.cache/clangd/index/maxpool2d_bwd_fp16.cpp.AA5B517C73C2030C.idx differ diff --git a/.cache/clangd/index/maxpool2d_bwd_fp32.cpp.3CE2CB1F186065DE.idx b/.cache/clangd/index/maxpool2d_bwd_fp32.cpp.3CE2CB1F186065DE.idx new file mode 100755 index 0000000000000000000000000000000000000000..b4ac47326dca4fd23d61a2cdfef940061789f103 Binary files /dev/null and b/.cache/clangd/index/maxpool2d_bwd_fp32.cpp.3CE2CB1F186065DE.idx differ diff --git a/.cache/clangd/index/mk_kn_mn_add_instance.cpp.F2B6A1DC94E17C44.idx b/.cache/clangd/index/mk_kn_mn_add_instance.cpp.F2B6A1DC94E17C44.idx new file mode 100755 index 0000000000000000000000000000000000000000..3e05c05848c6c9e2681b7a6159b0b9e622bda9a6 Binary files /dev/null and b/.cache/clangd/index/mk_kn_mn_add_instance.cpp.F2B6A1DC94E17C44.idx differ diff --git a/.cache/clangd/index/mk_kn_mn_default_pipeline_v1_instance.cpp.B364344C594966A7.idx b/.cache/clangd/index/mk_kn_mn_default_pipeline_v1_instance.cpp.B364344C594966A7.idx new file mode 100755 index 0000000000000000000000000000000000000000..d328b71dd41c5e7d84837faa2b7688faf126bec9 Binary files /dev/null and b/.cache/clangd/index/mk_kn_mn_default_pipeline_v1_instance.cpp.B364344C594966A7.idx differ diff --git a/.cache/clangd/index/mk_kn_mn_default_pipeline_v2_instance.cpp.A590A56B0084D88A.idx b/.cache/clangd/index/mk_kn_mn_default_pipeline_v2_instance.cpp.A590A56B0084D88A.idx new file mode 100755 index 0000000000000000000000000000000000000000..7a721ab32f09214433e04954e653f563f7070709 Binary files /dev/null and b/.cache/clangd/index/mk_kn_mn_default_pipeline_v2_instance.cpp.A590A56B0084D88A.idx differ diff --git a/.cache/clangd/index/mk_kn_mn_default_pipeline_v2_opt_instance.cpp.7FA7EE3992571786.idx b/.cache/clangd/index/mk_kn_mn_default_pipeline_v2_opt_instance.cpp.7FA7EE3992571786.idx new file mode 100755 index 0000000000000000000000000000000000000000..73c8cc8d98265d79be672749dd881b474e416c36 Binary files /dev/null and b/.cache/clangd/index/mk_kn_mn_default_pipeline_v2_opt_instance.cpp.7FA7EE3992571786.idx differ diff --git a/.cache/clangd/index/mk_kn_mn_interwave_pipeline_v1_instance.cpp.E84C4EA0690C6426.idx b/.cache/clangd/index/mk_kn_mn_interwave_pipeline_v1_instance.cpp.E84C4EA0690C6426.idx new file mode 100755 index 0000000000000000000000000000000000000000..5e799a3bf3f954eab573332ecfdf3f4e8b973491 Binary files /dev/null and b/.cache/clangd/index/mk_kn_mn_interwave_pipeline_v1_instance.cpp.E84C4EA0690C6426.idx differ diff --git a/.cache/clangd/index/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp.1D02066DCD48924F.idx b/.cache/clangd/index/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp.1D02066DCD48924F.idx new file mode 100755 index 0000000000000000000000000000000000000000..a59a645844991c13616c159b30285d60ed3908fa Binary files /dev/null and b/.cache/clangd/index/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp.1D02066DCD48924F.idx differ diff --git a/.cache/clangd/index/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp.DFAE93983C514BC6.idx b/.cache/clangd/index/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp.DFAE93983C514BC6.idx new file mode 100755 index 0000000000000000000000000000000000000000..c97f6357a2e99cddaf715aef834050dddd805165 Binary files /dev/null and b/.cache/clangd/index/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp.DFAE93983C514BC6.idx differ diff --git a/.cache/clangd/index/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp.905D27CAAE2E6FF3.idx b/.cache/clangd/index/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp.905D27CAAE2E6FF3.idx new file mode 100755 index 0000000000000000000000000000000000000000..74a955f0bd06d69aff8a8cbab432d16c182fc148 Binary files /dev/null and b/.cache/clangd/index/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp.905D27CAAE2E6FF3.idx differ diff --git a/.cache/clangd/index/mk_nk_mn_add_instance.cpp.0D22D3DFBDE4264A.idx b/.cache/clangd/index/mk_nk_mn_add_instance.cpp.0D22D3DFBDE4264A.idx new file mode 100755 index 0000000000000000000000000000000000000000..0fe49d8d3b95ed074c19764337924d437e0038a4 Binary files /dev/null and b/.cache/clangd/index/mk_nk_mn_add_instance.cpp.0D22D3DFBDE4264A.idx differ diff --git a/.cache/clangd/index/mk_nk_mn_default_pipeline_v1_instance.cpp.FC2B955D88E3500C.idx b/.cache/clangd/index/mk_nk_mn_default_pipeline_v1_instance.cpp.FC2B955D88E3500C.idx new file mode 100755 index 0000000000000000000000000000000000000000..094a38e903d334e64c95a2e9cb1e90e150713b82 Binary files /dev/null and b/.cache/clangd/index/mk_nk_mn_default_pipeline_v1_instance.cpp.FC2B955D88E3500C.idx differ diff --git a/.cache/clangd/index/mk_nk_mn_default_pipeline_v2_instance.cpp.AB0F8B7DC8CEF96B.idx b/.cache/clangd/index/mk_nk_mn_default_pipeline_v2_instance.cpp.AB0F8B7DC8CEF96B.idx new file mode 100755 index 0000000000000000000000000000000000000000..c08f7ebfd7a7e9b7d8f1e6b1b812a3f350342d4c Binary files /dev/null and b/.cache/clangd/index/mk_nk_mn_default_pipeline_v2_instance.cpp.AB0F8B7DC8CEF96B.idx differ diff --git a/.cache/clangd/index/mk_nk_mn_default_pipeline_v2_opt_instance.cpp.38A404E476FFFA7A.idx b/.cache/clangd/index/mk_nk_mn_default_pipeline_v2_opt_instance.cpp.38A404E476FFFA7A.idx new file mode 100755 index 0000000000000000000000000000000000000000..a1b8712efc14f79217c0ba8dd11c6b750626ab0e Binary files /dev/null and b/.cache/clangd/index/mk_nk_mn_default_pipeline_v2_opt_instance.cpp.38A404E476FFFA7A.idx differ diff --git a/.cache/clangd/index/mk_nk_mn_interwave_pipeline_v1_instance.cpp.1B80E7B3E985B173.idx b/.cache/clangd/index/mk_nk_mn_interwave_pipeline_v1_instance.cpp.1B80E7B3E985B173.idx new file mode 100755 index 0000000000000000000000000000000000000000..b9ffd374274f7c787c24863ae5420d254924e7cb Binary files /dev/null and b/.cache/clangd/index/mk_nk_mn_interwave_pipeline_v1_instance.cpp.1B80E7B3E985B173.idx differ diff --git a/.cache/clangd/index/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp.8AF96AFDCF49C20F.idx b/.cache/clangd/index/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp.8AF96AFDCF49C20F.idx new file mode 100755 index 0000000000000000000000000000000000000000..2b5a67557dc6355bff92928c4df07f18462a8ad7 Binary files /dev/null and b/.cache/clangd/index/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp.8AF96AFDCF49C20F.idx differ diff --git a/.cache/clangd/index/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp.AD25F4CBEC60DDF2.idx b/.cache/clangd/index/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp.AD25F4CBEC60DDF2.idx new file mode 100755 index 0000000000000000000000000000000000000000..b11e71dac6f6ea798ecd4b59ea87862865f23b53 Binary files /dev/null and b/.cache/clangd/index/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp.AD25F4CBEC60DDF2.idx differ diff --git a/.cache/clangd/index/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp.EB9B1319190E914D.idx b/.cache/clangd/index/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp.EB9B1319190E914D.idx new file mode 100755 index 0000000000000000000000000000000000000000..b2d7fddb1b59ec4b013aa01241194a5c4d9ca2c1 Binary files /dev/null and b/.cache/clangd/index/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp.EB9B1319190E914D.idx differ diff --git a/.cache/clangd/index/multi_index.hpp.46D763C8845699EA.idx b/.cache/clangd/index/multi_index.hpp.46D763C8845699EA.idx new file mode 100755 index 0000000000000000000000000000000000000000..599eeeb37f4ee839c85a375b7b68db9bbd85052f Binary files /dev/null and b/.cache/clangd/index/multi_index.hpp.46D763C8845699EA.idx differ diff --git a/.cache/clangd/index/multi_index_transform.hpp.97E44055DBD0AE25.idx b/.cache/clangd/index/multi_index_transform.hpp.97E44055DBD0AE25.idx new file mode 100755 index 0000000000000000000000000000000000000000..f23b7750e27c8d61f45965b198c3a9621572bf8e Binary files /dev/null and b/.cache/clangd/index/multi_index_transform.hpp.97E44055DBD0AE25.idx differ diff --git a/.cache/clangd/index/multi_index_transform_helper.hpp.7CC4F9B71E8A7CAA.idx b/.cache/clangd/index/multi_index_transform_helper.hpp.7CC4F9B71E8A7CAA.idx new file mode 100755 index 0000000000000000000000000000000000000000..e346249d06d4c5cdfb0b1a45976e7ddc93089604 Binary files /dev/null and b/.cache/clangd/index/multi_index_transform_helper.hpp.7CC4F9B71E8A7CAA.idx differ diff --git a/.cache/clangd/index/normalization_bwd_data_instance_common.hpp.3AFBFA5B5298FD17.idx b/.cache/clangd/index/normalization_bwd_data_instance_common.hpp.3AFBFA5B5298FD17.idx new file mode 100755 index 0000000000000000000000000000000000000000..b23ae11376072545e8aa4332d34a43ac9289b124 Binary files /dev/null and b/.cache/clangd/index/normalization_bwd_data_instance_common.hpp.3AFBFA5B5298FD17.idx differ diff --git a/.cache/clangd/index/normalization_bwd_gamma_beta_instance_common.hpp.84C16E4EE625B892.idx b/.cache/clangd/index/normalization_bwd_gamma_beta_instance_common.hpp.84C16E4EE625B892.idx new file mode 100755 index 0000000000000000000000000000000000000000..e3664d2a8ad52c7bd1114eca7c2a07ef7f0308b9 Binary files /dev/null and b/.cache/clangd/index/normalization_bwd_gamma_beta_instance_common.hpp.84C16E4EE625B892.idx differ diff --git a/.cache/clangd/index/normalization_fwd.hpp.C0D3E8108CC38C5D.idx b/.cache/clangd/index/normalization_fwd.hpp.C0D3E8108CC38C5D.idx new file mode 100755 index 0000000000000000000000000000000000000000..e6dac2397a662086464b7533b5ae4abb98ffe910 Binary files /dev/null and b/.cache/clangd/index/normalization_fwd.hpp.C0D3E8108CC38C5D.idx differ diff --git a/.cache/clangd/index/normalization_fwd_instance_common.hpp.38A39908E71BFB61.idx b/.cache/clangd/index/normalization_fwd_instance_common.hpp.38A39908E71BFB61.idx new file mode 100755 index 0000000000000000000000000000000000000000..821cc2330efb068f6a7bd8cd28f78b0f8deeadc0 Binary files /dev/null and b/.cache/clangd/index/normalization_fwd_instance_common.hpp.38A39908E71BFB61.idx differ diff --git a/.cache/clangd/index/number.hpp.25A61C6490A403AE.idx b/.cache/clangd/index/number.hpp.25A61C6490A403AE.idx new file mode 100755 index 0000000000000000000000000000000000000000..fa72a7b0af8f98e6e655fe174f101475be930322 Binary files /dev/null and b/.cache/clangd/index/number.hpp.25A61C6490A403AE.idx differ diff --git a/.cache/clangd/index/numeric.hpp.E1739325ACADC629.idx b/.cache/clangd/index/numeric.hpp.E1739325ACADC629.idx new file mode 100755 index 0000000000000000000000000000000000000000..91e8596204629f302e6ff059de29ff9e42a3af91 Binary files /dev/null and b/.cache/clangd/index/numeric.hpp.E1739325ACADC629.idx differ diff --git a/.cache/clangd/index/permute_1xHxW_fp16.cpp.5780304278E72678.idx b/.cache/clangd/index/permute_1xHxW_fp16.cpp.5780304278E72678.idx new file mode 100755 index 0000000000000000000000000000000000000000..ad3b77bf3951ab9d0694ba32cbc7e977d7dd025d Binary files /dev/null and b/.cache/clangd/index/permute_1xHxW_fp16.cpp.5780304278E72678.idx differ diff --git a/.cache/clangd/index/permute_HxWx4_fp16.cpp.FCA2351A6D6E3584.idx b/.cache/clangd/index/permute_HxWx4_fp16.cpp.FCA2351A6D6E3584.idx new file mode 100755 index 0000000000000000000000000000000000000000..989ea697de84ba54c2efc7265caf0abf7804c5db Binary files /dev/null and b/.cache/clangd/index/permute_HxWx4_fp16.cpp.FCA2351A6D6E3584.idx differ diff --git a/.cache/clangd/index/permute_NxHxW_fp16.cpp.6FE6AEAF71058087.idx b/.cache/clangd/index/permute_NxHxW_fp16.cpp.6FE6AEAF71058087.idx new file mode 100755 index 0000000000000000000000000000000000000000..00d66a9b695063e1fc21b838bbb983137863080e Binary files /dev/null and b/.cache/clangd/index/permute_NxHxW_fp16.cpp.6FE6AEAF71058087.idx differ diff --git a/.cache/clangd/index/permute_scale.hpp.CFE8A4ABEDAA7993.idx b/.cache/clangd/index/permute_scale.hpp.CFE8A4ABEDAA7993.idx new file mode 100755 index 0000000000000000000000000000000000000000..8d2b410014e692d75cb7fcec5df1e8595592834e Binary files /dev/null and b/.cache/clangd/index/permute_scale.hpp.CFE8A4ABEDAA7993.idx differ diff --git a/.cache/clangd/index/pool2d_fwd_common.hpp.3724B9CBDE79A373.idx b/.cache/clangd/index/pool2d_fwd_common.hpp.3724B9CBDE79A373.idx new file mode 100755 index 0000000000000000000000000000000000000000..338bc7330ac800976e94dfc043b82bf1af9b8300 Binary files /dev/null and b/.cache/clangd/index/pool2d_fwd_common.hpp.3724B9CBDE79A373.idx differ diff --git a/.cache/clangd/index/pool2d_fwd_fp16.cpp.09424A28FF97D19C.idx b/.cache/clangd/index/pool2d_fwd_fp16.cpp.09424A28FF97D19C.idx new file mode 100755 index 0000000000000000000000000000000000000000..9d0aff7c6a125cf0cf48dc38b3dff85d669c3f0a Binary files /dev/null and b/.cache/clangd/index/pool2d_fwd_fp16.cpp.09424A28FF97D19C.idx differ diff --git a/.cache/clangd/index/pool2d_fwd_fp32.cpp.1AF35A327BA2145F.idx b/.cache/clangd/index/pool2d_fwd_fp32.cpp.1AF35A327BA2145F.idx new file mode 100755 index 0000000000000000000000000000000000000000..21940e2cbbaaf71a787e1aff3fe35357d81b7fc4 Binary files /dev/null and b/.cache/clangd/index/pool2d_fwd_fp32.cpp.1AF35A327BA2145F.idx differ diff --git a/.cache/clangd/index/pool3d_fwd.hpp.32DAF50D80F54A30.idx b/.cache/clangd/index/pool3d_fwd.hpp.32DAF50D80F54A30.idx new file mode 100755 index 0000000000000000000000000000000000000000..e753637edaf38734d1e1a08b4e81a09b81fe1ed2 Binary files /dev/null and b/.cache/clangd/index/pool3d_fwd.hpp.32DAF50D80F54A30.idx differ diff --git a/.cache/clangd/index/pool3d_fwd_common.hpp.EE0EEEB8E82E75A0.idx b/.cache/clangd/index/pool3d_fwd_common.hpp.EE0EEEB8E82E75A0.idx new file mode 100755 index 0000000000000000000000000000000000000000..634cf91401295cfe6ea5e77768d7829582df1824 Binary files /dev/null and b/.cache/clangd/index/pool3d_fwd_common.hpp.EE0EEEB8E82E75A0.idx differ diff --git a/.cache/clangd/index/pool3d_fwd_fp16.cpp.176B982A3BF061DE.idx b/.cache/clangd/index/pool3d_fwd_fp16.cpp.176B982A3BF061DE.idx new file mode 100755 index 0000000000000000000000000000000000000000..605fb0dd7fe4b90f6a731d502846ab4a93112788 Binary files /dev/null and b/.cache/clangd/index/pool3d_fwd_fp16.cpp.176B982A3BF061DE.idx differ diff --git a/.cache/clangd/index/pool_fwd_instance_common.hpp.4DC931E3BBF2D1BA.idx b/.cache/clangd/index/pool_fwd_instance_common.hpp.4DC931E3BBF2D1BA.idx new file mode 100755 index 0000000000000000000000000000000000000000..d9800f6acd29a0731f8a751eeda946751dfe576e Binary files /dev/null and b/.cache/clangd/index/pool_fwd_instance_common.hpp.4DC931E3BBF2D1BA.idx differ diff --git a/.cache/clangd/index/profile_avg_pool3d_bwd.cpp.4403F8C1766CA1E8.idx b/.cache/clangd/index/profile_avg_pool3d_bwd.cpp.4403F8C1766CA1E8.idx new file mode 100755 index 0000000000000000000000000000000000000000..694bf2785331f601c35b171cbed230816646d2d3 Binary files /dev/null and b/.cache/clangd/index/profile_avg_pool3d_bwd.cpp.4403F8C1766CA1E8.idx differ diff --git a/.cache/clangd/index/profile_avg_pool3d_bwd_impl.hpp.C8240F2385534214.idx b/.cache/clangd/index/profile_avg_pool3d_bwd_impl.hpp.C8240F2385534214.idx new file mode 100755 index 0000000000000000000000000000000000000000..d47a808cd0706310fbbd2b5cd21b3600b8845dc9 Binary files /dev/null and b/.cache/clangd/index/profile_avg_pool3d_bwd_impl.hpp.C8240F2385534214.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm.cpp.8A238D0EF3B6DB52.idx b/.cache/clangd/index/profile_batched_gemm.cpp.8A238D0EF3B6DB52.idx new file mode 100755 index 0000000000000000000000000000000000000000..eb1b44c88a963e02905d9f160f423d3e2649f11a Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm.cpp.8A238D0EF3B6DB52.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_add_relu_gemm_add.cpp.CEB2ADB0FF2D0A07.idx b/.cache/clangd/index/profile_batched_gemm_add_relu_gemm_add.cpp.CEB2ADB0FF2D0A07.idx new file mode 100755 index 0000000000000000000000000000000000000000..c6b382c5df4137591dc6c748884528566982280d Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_add_relu_gemm_add.cpp.CEB2ADB0FF2D0A07.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_add_relu_gemm_add_impl.hpp.CC900C93F3C48179.idx b/.cache/clangd/index/profile_batched_gemm_add_relu_gemm_add_impl.hpp.CC900C93F3C48179.idx new file mode 100755 index 0000000000000000000000000000000000000000..67b088cdcfaf1603f98e5480324176fd1cd857cc Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_add_relu_gemm_add_impl.hpp.CC900C93F3C48179.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp.F57E6277DAC86E29.idx b/.cache/clangd/index/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp.F57E6277DAC86E29.idx new file mode 100755 index 0000000000000000000000000000000000000000..f862c54a7625316bf76eb6ddb95100f717ae4a95 Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp.F57E6277DAC86E29.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_gemm.cpp.B214B5448CC6B6C2.idx b/.cache/clangd/index/profile_batched_gemm_gemm.cpp.B214B5448CC6B6C2.idx new file mode 100755 index 0000000000000000000000000000000000000000..4fdbd0130a479eeeebd342669993bbd3018e7c1e Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_gemm.cpp.B214B5448CC6B6C2.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_gemm_impl.hpp.95E513E815A63274.idx b/.cache/clangd/index/profile_batched_gemm_gemm_impl.hpp.95E513E815A63274.idx new file mode 100755 index 0000000000000000000000000000000000000000..c96faf96d1779f8026e86991d5d9865a9a27c50c Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_gemm_impl.hpp.95E513E815A63274.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_impl.hpp.65F140203E93B240.idx b/.cache/clangd/index/profile_batched_gemm_impl.hpp.65F140203E93B240.idx new file mode 100755 index 0000000000000000000000000000000000000000..c68960bb70195d33b61a87470fad555aefd3cae4 Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_impl.hpp.65F140203E93B240.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_reduce.cpp.EEA3A1DAF90E2787.idx b/.cache/clangd/index/profile_batched_gemm_reduce.cpp.EEA3A1DAF90E2787.idx new file mode 100755 index 0000000000000000000000000000000000000000..0e0d9391a389a418677006400b314d189931c35a Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_reduce.cpp.EEA3A1DAF90E2787.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_reduce_impl.hpp.5DA2586197AB1178.idx b/.cache/clangd/index/profile_batched_gemm_reduce_impl.hpp.5DA2586197AB1178.idx new file mode 100755 index 0000000000000000000000000000000000000000..e2b1fc5ba39c3cbe963c366d2b0e2b99a35d08cf Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_reduce_impl.hpp.5DA2586197AB1178.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_softmax_gemm_impl.hpp.03AF7B0A8D358951.idx b/.cache/clangd/index/profile_batched_gemm_softmax_gemm_impl.hpp.03AF7B0A8D358951.idx new file mode 100755 index 0000000000000000000000000000000000000000..103a6d4bc5fe8137efcf14901291f7858940b8db Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_softmax_gemm_impl.hpp.03AF7B0A8D358951.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_softmax_gemm_permute_impl.hpp.14377DE4299D9A14.idx b/.cache/clangd/index/profile_batched_gemm_softmax_gemm_permute_impl.hpp.14377DE4299D9A14.idx new file mode 100755 index 0000000000000000000000000000000000000000..ec8d4039ebf6e9384a25813a7689f9999d765b8d Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_softmax_gemm_permute_impl.hpp.14377DE4299D9A14.idx differ diff --git a/.cache/clangd/index/profile_batchnorm_backward_impl.hpp.30164435038F0711.idx b/.cache/clangd/index/profile_batchnorm_backward_impl.hpp.30164435038F0711.idx new file mode 100755 index 0000000000000000000000000000000000000000..6bb0850861d7726b9044b7b2dea2230e1836bb39 Binary files /dev/null and b/.cache/clangd/index/profile_batchnorm_backward_impl.hpp.30164435038F0711.idx differ diff --git a/.cache/clangd/index/profile_batchnorm_bwd.cpp.8CF98960A7C44AE9.idx b/.cache/clangd/index/profile_batchnorm_bwd.cpp.8CF98960A7C44AE9.idx new file mode 100755 index 0000000000000000000000000000000000000000..c8d56c32530eedcec7d580d94239fb0158122e34 Binary files /dev/null and b/.cache/clangd/index/profile_batchnorm_bwd.cpp.8CF98960A7C44AE9.idx differ diff --git a/.cache/clangd/index/profile_batchnorm_forward_impl.hpp.07F6CF6EEC2BDC7E.idx b/.cache/clangd/index/profile_batchnorm_forward_impl.hpp.07F6CF6EEC2BDC7E.idx new file mode 100755 index 0000000000000000000000000000000000000000..bc7f958dd09199263efd5a903f025f644e304da8 Binary files /dev/null and b/.cache/clangd/index/profile_batchnorm_forward_impl.hpp.07F6CF6EEC2BDC7E.idx differ diff --git a/.cache/clangd/index/profile_batchnorm_fwd.cpp.493E9D388D70238E.idx b/.cache/clangd/index/profile_batchnorm_fwd.cpp.493E9D388D70238E.idx new file mode 100755 index 0000000000000000000000000000000000000000..f79468237d5ae4b13d0ccddbdbb3d5c4719f2ed4 Binary files /dev/null and b/.cache/clangd/index/profile_batchnorm_fwd.cpp.493E9D388D70238E.idx differ diff --git a/.cache/clangd/index/profile_batchnorm_infer.cpp.69EB815355931680.idx b/.cache/clangd/index/profile_batchnorm_infer.cpp.69EB815355931680.idx new file mode 100755 index 0000000000000000000000000000000000000000..4952169b11c7078db07d0aeed7ca59437e736f1f Binary files /dev/null and b/.cache/clangd/index/profile_batchnorm_infer.cpp.69EB815355931680.idx differ diff --git a/.cache/clangd/index/profile_batchnorm_infer_impl.hpp.B4DE6B04DAFD57EF.idx b/.cache/clangd/index/profile_batchnorm_infer_impl.hpp.B4DE6B04DAFD57EF.idx new file mode 100755 index 0000000000000000000000000000000000000000..fe9e7cf1961d7f35c162b867f82cf5b85e1ef4e3 Binary files /dev/null and b/.cache/clangd/index/profile_batchnorm_infer_impl.hpp.B4DE6B04DAFD57EF.idx differ diff --git a/.cache/clangd/index/profile_contraction_bilinear.cpp.B7E7F8EC02159FA8.idx b/.cache/clangd/index/profile_contraction_bilinear.cpp.B7E7F8EC02159FA8.idx new file mode 100755 index 0000000000000000000000000000000000000000..a260245b162a8ff0f78a90a2ff3041791bd32fff Binary files /dev/null and b/.cache/clangd/index/profile_contraction_bilinear.cpp.B7E7F8EC02159FA8.idx differ diff --git a/.cache/clangd/index/profile_contraction_impl.hpp.E5FF1E81EBC1E7D5.idx b/.cache/clangd/index/profile_contraction_impl.hpp.E5FF1E81EBC1E7D5.idx new file mode 100755 index 0000000000000000000000000000000000000000..b96f441f009839f6caf144ff272d1d2cee041288 Binary files /dev/null and b/.cache/clangd/index/profile_contraction_impl.hpp.E5FF1E81EBC1E7D5.idx differ diff --git a/.cache/clangd/index/profile_contraction_scale.cpp.C10C8FEB4E77E97A.idx b/.cache/clangd/index/profile_contraction_scale.cpp.C10C8FEB4E77E97A.idx new file mode 100755 index 0000000000000000000000000000000000000000..9b77d58a8790c2f219b82d0eaac4e6ec9087d52a Binary files /dev/null and b/.cache/clangd/index/profile_contraction_scale.cpp.C10C8FEB4E77E97A.idx differ diff --git a/.cache/clangd/index/profile_contraction_utils.hpp.E0F0B9314CF06E36.idx b/.cache/clangd/index/profile_contraction_utils.hpp.E0F0B9314CF06E36.idx new file mode 100755 index 0000000000000000000000000000000000000000..ac4ed498cac427f215ce4618589b17d26772334c Binary files /dev/null and b/.cache/clangd/index/profile_contraction_utils.hpp.E0F0B9314CF06E36.idx differ diff --git a/.cache/clangd/index/profile_conv_bwd_data.cpp.E30D0E66DFF68BCC.idx b/.cache/clangd/index/profile_conv_bwd_data.cpp.E30D0E66DFF68BCC.idx new file mode 100755 index 0000000000000000000000000000000000000000..52dc185d2bd7aaeff0e75db49d76ffbf17a11847 Binary files /dev/null and b/.cache/clangd/index/profile_conv_bwd_data.cpp.E30D0E66DFF68BCC.idx differ diff --git a/.cache/clangd/index/profile_conv_bwd_data_impl.hpp.0CBAC32BD7F22884.idx b/.cache/clangd/index/profile_conv_bwd_data_impl.hpp.0CBAC32BD7F22884.idx new file mode 100755 index 0000000000000000000000000000000000000000..f9ee2014df034106d54c3e7353bdb663be4b5543 Binary files /dev/null and b/.cache/clangd/index/profile_conv_bwd_data_impl.hpp.0CBAC32BD7F22884.idx differ diff --git a/.cache/clangd/index/profile_conv_fwd.cpp.7612DDA826C4C111.idx b/.cache/clangd/index/profile_conv_fwd.cpp.7612DDA826C4C111.idx new file mode 100755 index 0000000000000000000000000000000000000000..a530ec2b19c849b70f03dc19408645c320209170 Binary files /dev/null and b/.cache/clangd/index/profile_conv_fwd.cpp.7612DDA826C4C111.idx differ diff --git a/.cache/clangd/index/profile_conv_fwd_bias_relu.cpp.2AED2F5E417190C4.idx b/.cache/clangd/index/profile_conv_fwd_bias_relu.cpp.2AED2F5E417190C4.idx new file mode 100755 index 0000000000000000000000000000000000000000..2a3dc843dddf5669958a70712143b0b02aff3727 Binary files /dev/null and b/.cache/clangd/index/profile_conv_fwd_bias_relu.cpp.2AED2F5E417190C4.idx differ diff --git a/.cache/clangd/index/profile_conv_fwd_bias_relu_add.cpp.064478D11C1695BD.idx b/.cache/clangd/index/profile_conv_fwd_bias_relu_add.cpp.064478D11C1695BD.idx new file mode 100755 index 0000000000000000000000000000000000000000..870b378d50917218e4942a5854882be6d7fb618c Binary files /dev/null and b/.cache/clangd/index/profile_conv_fwd_bias_relu_add.cpp.064478D11C1695BD.idx differ diff --git a/.cache/clangd/index/profile_conv_fwd_bias_relu_add_impl.hpp.660B8209627A1FBE.idx b/.cache/clangd/index/profile_conv_fwd_bias_relu_add_impl.hpp.660B8209627A1FBE.idx new file mode 100755 index 0000000000000000000000000000000000000000..3a6079734b51399bc900437f3d54b50e93a48aac Binary files /dev/null and b/.cache/clangd/index/profile_conv_fwd_bias_relu_add_impl.hpp.660B8209627A1FBE.idx differ diff --git a/.cache/clangd/index/profile_conv_fwd_bias_relu_impl.hpp.0A6D3A36AE3BBBB6.idx b/.cache/clangd/index/profile_conv_fwd_bias_relu_impl.hpp.0A6D3A36AE3BBBB6.idx new file mode 100755 index 0000000000000000000000000000000000000000..257cfa256ef06354f91b0a41c26b5d9437dc0863 Binary files /dev/null and b/.cache/clangd/index/profile_conv_fwd_bias_relu_impl.hpp.0A6D3A36AE3BBBB6.idx differ diff --git a/.cache/clangd/index/profile_conv_fwd_impl.hpp.DB77593E3FC3FC2B.idx b/.cache/clangd/index/profile_conv_fwd_impl.hpp.DB77593E3FC3FC2B.idx new file mode 100755 index 0000000000000000000000000000000000000000..737b2c49296f45919647bbd1ff37f164c7e49cb6 Binary files /dev/null and b/.cache/clangd/index/profile_conv_fwd_impl.hpp.DB77593E3FC3FC2B.idx differ diff --git a/.cache/clangd/index/profile_conv_tensor_rearrange.cpp.50518BDD3D0D27F8.idx b/.cache/clangd/index/profile_conv_tensor_rearrange.cpp.50518BDD3D0D27F8.idx new file mode 100755 index 0000000000000000000000000000000000000000..e1a3dbb47373cf25d8244efd7e15f6aef019eb33 Binary files /dev/null and b/.cache/clangd/index/profile_conv_tensor_rearrange.cpp.50518BDD3D0D27F8.idx differ diff --git a/.cache/clangd/index/profile_conv_tensor_rearrange_impl.hpp.ECD3F7A8AA337DB3.idx b/.cache/clangd/index/profile_conv_tensor_rearrange_impl.hpp.ECD3F7A8AA337DB3.idx new file mode 100755 index 0000000000000000000000000000000000000000..b910d870981f257576f6fff90f9e48d81e957878 Binary files /dev/null and b/.cache/clangd/index/profile_conv_tensor_rearrange_impl.hpp.ECD3F7A8AA337DB3.idx differ diff --git a/.cache/clangd/index/profile_elementwise_layernorm_impl.hpp.228FBBB77B1C2212.idx b/.cache/clangd/index/profile_elementwise_layernorm_impl.hpp.228FBBB77B1C2212.idx new file mode 100755 index 0000000000000000000000000000000000000000..e624bdc5bf23a8cd94dad739fa830fe5e659e71c Binary files /dev/null and b/.cache/clangd/index/profile_elementwise_layernorm_impl.hpp.228FBBB77B1C2212.idx differ diff --git a/.cache/clangd/index/profile_gemm.cpp.1660D1BAA0C70B61.idx b/.cache/clangd/index/profile_gemm.cpp.1660D1BAA0C70B61.idx new file mode 100755 index 0000000000000000000000000000000000000000..ed68824fd5eb8ef4eaf78fa56158b51218670294 Binary files /dev/null and b/.cache/clangd/index/profile_gemm.cpp.1660D1BAA0C70B61.idx differ diff --git a/.cache/clangd/index/profile_gemm_add.cpp.B4A511D84B31E476.idx b/.cache/clangd/index/profile_gemm_add.cpp.B4A511D84B31E476.idx new file mode 100755 index 0000000000000000000000000000000000000000..f6acbea7247d8dde54a22628700d4ce97f589184 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add.cpp.B4A511D84B31E476.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_add_fastgelu.cpp.374CFB86EAD905D5.idx b/.cache/clangd/index/profile_gemm_add_add_fastgelu.cpp.374CFB86EAD905D5.idx new file mode 100755 index 0000000000000000000000000000000000000000..8b5d2ff4ac2b068b07904776f8499f68a0cbedde Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_add_fastgelu.cpp.374CFB86EAD905D5.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_add_fastgelu_impl.hpp.7165E9E08D31883A.idx b/.cache/clangd/index/profile_gemm_add_add_fastgelu_impl.hpp.7165E9E08D31883A.idx new file mode 100755 index 0000000000000000000000000000000000000000..db1d7d541f80a063ad9b781245ca2f9b6966dd9e Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_add_fastgelu_impl.hpp.7165E9E08D31883A.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_fastgelu.cpp.CA0713C21D4F7C04.idx b/.cache/clangd/index/profile_gemm_add_fastgelu.cpp.CA0713C21D4F7C04.idx new file mode 100755 index 0000000000000000000000000000000000000000..05b280636d214da61ce8ff8d91f1f1add73fefdb Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_fastgelu.cpp.CA0713C21D4F7C04.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_fastgelu_impl.hpp.23CA5590877F722E.idx b/.cache/clangd/index/profile_gemm_add_fastgelu_impl.hpp.23CA5590877F722E.idx new file mode 100755 index 0000000000000000000000000000000000000000..d8715452f96170f66744f7dc2145296e047bb5dc Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_fastgelu_impl.hpp.23CA5590877F722E.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_impl.hpp.A44323833CE8C5AD.idx b/.cache/clangd/index/profile_gemm_add_impl.hpp.A44323833CE8C5AD.idx new file mode 100755 index 0000000000000000000000000000000000000000..e1c62613961c97e813d2b0244d8ac68f58a86d47 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_impl.hpp.A44323833CE8C5AD.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_multiply.cpp.136C377C7D5B0E54.idx b/.cache/clangd/index/profile_gemm_add_multiply.cpp.136C377C7D5B0E54.idx new file mode 100755 index 0000000000000000000000000000000000000000..23c1724f1e8ef30ff5d1e7ebcce9f0934a3eeacb Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_multiply.cpp.136C377C7D5B0E54.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_multiply_impl.hpp.FBBEFEDC4A78D751.idx b/.cache/clangd/index/profile_gemm_add_multiply_impl.hpp.FBBEFEDC4A78D751.idx new file mode 100755 index 0000000000000000000000000000000000000000..7f3d50595e5f6c4596b31cd6aba5c320aa9d9237 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_multiply_impl.hpp.FBBEFEDC4A78D751.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_relu.cpp.7B9D4CAF29075E5F.idx b/.cache/clangd/index/profile_gemm_add_relu.cpp.7B9D4CAF29075E5F.idx new file mode 100755 index 0000000000000000000000000000000000000000..3ee8f4d173f6faaad440d6c7070882da99f0cde8 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_relu.cpp.7B9D4CAF29075E5F.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_relu_add_layernorm.cpp.E30A7F678D4C6FAC.idx b/.cache/clangd/index/profile_gemm_add_relu_add_layernorm.cpp.E30A7F678D4C6FAC.idx new file mode 100755 index 0000000000000000000000000000000000000000..1fdd81d44c80bb46ef379a6c65a212c433035d37 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_relu_add_layernorm.cpp.E30A7F678D4C6FAC.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_relu_add_layernorm_impl.hpp.5DE61D830E8EC4CC.idx b/.cache/clangd/index/profile_gemm_add_relu_add_layernorm_impl.hpp.5DE61D830E8EC4CC.idx new file mode 100755 index 0000000000000000000000000000000000000000..5af399f968c3d35c70644b9709463cc2a3ad5865 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_relu_add_layernorm_impl.hpp.5DE61D830E8EC4CC.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_relu_impl.hpp.9153E795377986F0.idx b/.cache/clangd/index/profile_gemm_add_relu_impl.hpp.9153E795377986F0.idx new file mode 100755 index 0000000000000000000000000000000000000000..a6b73a4747c619657cc6e274ab1ef0b9043d4cc6 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_relu_impl.hpp.9153E795377986F0.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_silu.cpp.67ABB3D17F5C92B4.idx b/.cache/clangd/index/profile_gemm_add_silu.cpp.67ABB3D17F5C92B4.idx new file mode 100755 index 0000000000000000000000000000000000000000..1a064c87711d1b575e0eb9752155e5c21a716e63 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_silu.cpp.67ABB3D17F5C92B4.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_silu_impl.hpp.CC18A7C19DDCC841.idx b/.cache/clangd/index/profile_gemm_add_silu_impl.hpp.CC18A7C19DDCC841.idx new file mode 100755 index 0000000000000000000000000000000000000000..9db8161a1f7d82d9b01167b94ad2bae42aa7656e Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_silu_impl.hpp.CC18A7C19DDCC841.idx differ diff --git a/.cache/clangd/index/profile_gemm_bias_add_reduce.cpp.CF67B6B3D02F20A5.idx b/.cache/clangd/index/profile_gemm_bias_add_reduce.cpp.CF67B6B3D02F20A5.idx new file mode 100755 index 0000000000000000000000000000000000000000..07ee4db7a270e9d62a50eec4447a37d8ade1fbaa Binary files /dev/null and b/.cache/clangd/index/profile_gemm_bias_add_reduce.cpp.CF67B6B3D02F20A5.idx differ diff --git a/.cache/clangd/index/profile_gemm_bias_add_reduce_impl.hpp.CC4023F2075FFC72.idx b/.cache/clangd/index/profile_gemm_bias_add_reduce_impl.hpp.CC4023F2075FFC72.idx new file mode 100755 index 0000000000000000000000000000000000000000..7d54811a97007c9d5196f7d8b81d221dc0ea1dd4 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_bias_add_reduce_impl.hpp.CC4023F2075FFC72.idx differ diff --git a/.cache/clangd/index/profile_gemm_bilinear.cpp.2A04608A0019808D.idx b/.cache/clangd/index/profile_gemm_bilinear.cpp.2A04608A0019808D.idx new file mode 100755 index 0000000000000000000000000000000000000000..fb5a07c889155626cd50132152c0eeaf0bec67bd Binary files /dev/null and b/.cache/clangd/index/profile_gemm_bilinear.cpp.2A04608A0019808D.idx differ diff --git a/.cache/clangd/index/profile_gemm_bilinear_impl.hpp.3D87C8E5D761FA48.idx b/.cache/clangd/index/profile_gemm_bilinear_impl.hpp.3D87C8E5D761FA48.idx new file mode 100755 index 0000000000000000000000000000000000000000..1a7f7f3867e4ebacbd0eff534607f1ae1628867a Binary files /dev/null and b/.cache/clangd/index/profile_gemm_bilinear_impl.hpp.3D87C8E5D761FA48.idx differ diff --git a/.cache/clangd/index/profile_gemm_fastgelu.cpp.2EC621FE126AC880.idx b/.cache/clangd/index/profile_gemm_fastgelu.cpp.2EC621FE126AC880.idx new file mode 100755 index 0000000000000000000000000000000000000000..9e8b5dfc8d626893561ad533a279c5634cf72e7c Binary files /dev/null and b/.cache/clangd/index/profile_gemm_fastgelu.cpp.2EC621FE126AC880.idx differ diff --git a/.cache/clangd/index/profile_gemm_fastgelu_impl.hpp.090E8EEC37D7278D.idx b/.cache/clangd/index/profile_gemm_fastgelu_impl.hpp.090E8EEC37D7278D.idx new file mode 100755 index 0000000000000000000000000000000000000000..471324fd2b781d53436cff2c7a8dc90ee0ba0bd1 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_fastgelu_impl.hpp.090E8EEC37D7278D.idx differ diff --git a/.cache/clangd/index/profile_gemm_impl.hpp.DB85F2B55BD1CC04.idx b/.cache/clangd/index/profile_gemm_impl.hpp.DB85F2B55BD1CC04.idx new file mode 100755 index 0000000000000000000000000000000000000000..a1e8f5bd48d3b16f452495da9f434f0a920ecae8 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_impl.hpp.DB85F2B55BD1CC04.idx differ diff --git a/.cache/clangd/index/profile_gemm_multiply_add.cpp.0030E28460A11C1F.idx b/.cache/clangd/index/profile_gemm_multiply_add.cpp.0030E28460A11C1F.idx new file mode 100755 index 0000000000000000000000000000000000000000..846158aba40b783d13f87dd2b6de595f20b90a66 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_multiply_add.cpp.0030E28460A11C1F.idx differ diff --git a/.cache/clangd/index/profile_gemm_multiply_add_impl.hpp.A03C0417F818F4FD.idx b/.cache/clangd/index/profile_gemm_multiply_add_impl.hpp.A03C0417F818F4FD.idx new file mode 100755 index 0000000000000000000000000000000000000000..98ef2a1f0c7645ea93240e849ae969e448cd5b06 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_multiply_add_impl.hpp.A03C0417F818F4FD.idx differ diff --git a/.cache/clangd/index/profile_gemm_reduce.cpp.0A54AE6B66160321.idx b/.cache/clangd/index/profile_gemm_reduce.cpp.0A54AE6B66160321.idx new file mode 100755 index 0000000000000000000000000000000000000000..0ff1b9016ea90562783702588a5aad2fa51243d3 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_reduce.cpp.0A54AE6B66160321.idx differ diff --git a/.cache/clangd/index/profile_gemm_reduce_impl.hpp.FE75615CA769411C.idx b/.cache/clangd/index/profile_gemm_reduce_impl.hpp.FE75615CA769411C.idx new file mode 100755 index 0000000000000000000000000000000000000000..82c975a3f571ebd814bd949deac6070b27314774 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_reduce_impl.hpp.FE75615CA769411C.idx differ diff --git a/.cache/clangd/index/profile_gemm_splitk.cpp.6BF00CBB81DE4660.idx b/.cache/clangd/index/profile_gemm_splitk.cpp.6BF00CBB81DE4660.idx new file mode 100755 index 0000000000000000000000000000000000000000..b4f951d643c468b70ed509c9f201e33e9bcdfda7 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_splitk.cpp.6BF00CBB81DE4660.idx differ diff --git a/.cache/clangd/index/profile_gemm_splitk_impl.hpp.1DC09EFBBC454E8F.idx b/.cache/clangd/index/profile_gemm_splitk_impl.hpp.1DC09EFBBC454E8F.idx new file mode 100755 index 0000000000000000000000000000000000000000..8b7c3856117c489fb79a5f080bc7ef8f932adadd Binary files /dev/null and b/.cache/clangd/index/profile_gemm_splitk_impl.hpp.1DC09EFBBC454E8F.idx differ diff --git a/.cache/clangd/index/profile_gemm_streamk.cpp.46F4E02E0912E672.idx b/.cache/clangd/index/profile_gemm_streamk.cpp.46F4E02E0912E672.idx new file mode 100755 index 0000000000000000000000000000000000000000..3931f5448c186afc8cd28551cf4fa793664cb2e9 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_streamk.cpp.46F4E02E0912E672.idx differ diff --git a/.cache/clangd/index/profile_gemm_streamk_impl.hpp.73FA01C66CC8D3E3.idx b/.cache/clangd/index/profile_gemm_streamk_impl.hpp.73FA01C66CC8D3E3.idx new file mode 100755 index 0000000000000000000000000000000000000000..bd495584dffe698603fa3b1db574f18f0a75754d Binary files /dev/null and b/.cache/clangd/index/profile_gemm_streamk_impl.hpp.73FA01C66CC8D3E3.idx differ diff --git a/.cache/clangd/index/profile_grouped_conv_bwd_data.cpp.FA67D9F6149A96BE.idx b/.cache/clangd/index/profile_grouped_conv_bwd_data.cpp.FA67D9F6149A96BE.idx new file mode 100755 index 0000000000000000000000000000000000000000..7768d9861d76029eba15d1c2abc80dff7498a880 Binary files /dev/null and b/.cache/clangd/index/profile_grouped_conv_bwd_data.cpp.FA67D9F6149A96BE.idx differ diff --git a/.cache/clangd/index/profile_grouped_conv_bwd_data_impl.hpp.59E9CBB730B20A37.idx b/.cache/clangd/index/profile_grouped_conv_bwd_data_impl.hpp.59E9CBB730B20A37.idx new file mode 100755 index 0000000000000000000000000000000000000000..399286bf2d2515dbe947f47db49ba8e2e6028fff Binary files /dev/null and b/.cache/clangd/index/profile_grouped_conv_bwd_data_impl.hpp.59E9CBB730B20A37.idx differ diff --git a/.cache/clangd/index/profile_grouped_conv_bwd_weight.cpp.34186A38546F70D1.idx b/.cache/clangd/index/profile_grouped_conv_bwd_weight.cpp.34186A38546F70D1.idx new file mode 100755 index 0000000000000000000000000000000000000000..3dd933d953390016fe6a1828c94ecff674b02dcb Binary files /dev/null and b/.cache/clangd/index/profile_grouped_conv_bwd_weight.cpp.34186A38546F70D1.idx differ diff --git a/.cache/clangd/index/profile_grouped_conv_bwd_weight_impl.hpp.A8CF2351C95F34F5.idx b/.cache/clangd/index/profile_grouped_conv_bwd_weight_impl.hpp.A8CF2351C95F34F5.idx new file mode 100755 index 0000000000000000000000000000000000000000..6856e1d7921f2646ee7d4ce26f69b61054a7df68 Binary files /dev/null and b/.cache/clangd/index/profile_grouped_conv_bwd_weight_impl.hpp.A8CF2351C95F34F5.idx differ diff --git a/.cache/clangd/index/profile_grouped_conv_fwd.cpp.8D8BCE0560F621C4.idx b/.cache/clangd/index/profile_grouped_conv_fwd.cpp.8D8BCE0560F621C4.idx new file mode 100755 index 0000000000000000000000000000000000000000..706e95ee710a3a7e8892305fd115f9b300a908b8 Binary files /dev/null and b/.cache/clangd/index/profile_grouped_conv_fwd.cpp.8D8BCE0560F621C4.idx differ diff --git a/.cache/clangd/index/profile_grouped_conv_fwd_impl.hpp.90847DB78EF4B739.idx b/.cache/clangd/index/profile_grouped_conv_fwd_impl.hpp.90847DB78EF4B739.idx new file mode 100755 index 0000000000000000000000000000000000000000..27e3e0c6479716d36fae28d1e53ca57b7e103532 Binary files /dev/null and b/.cache/clangd/index/profile_grouped_conv_fwd_impl.hpp.90847DB78EF4B739.idx differ diff --git a/.cache/clangd/index/profile_grouped_gemm.cpp.7EE7588374346C13.idx b/.cache/clangd/index/profile_grouped_gemm.cpp.7EE7588374346C13.idx new file mode 100755 index 0000000000000000000000000000000000000000..65fb0e7c5f4b6fd177b749acfb2afbe3a88eadd0 Binary files /dev/null and b/.cache/clangd/index/profile_grouped_gemm.cpp.7EE7588374346C13.idx differ diff --git a/.cache/clangd/index/profile_grouped_gemm_fastgelu.cpp.AC58B27586C8FEAD.idx b/.cache/clangd/index/profile_grouped_gemm_fastgelu.cpp.AC58B27586C8FEAD.idx new file mode 100755 index 0000000000000000000000000000000000000000..5b3e8c346c25a29ca8178f3facec52ea389431cb Binary files /dev/null and b/.cache/clangd/index/profile_grouped_gemm_fastgelu.cpp.AC58B27586C8FEAD.idx differ diff --git a/.cache/clangd/index/profile_grouped_gemm_fastgelu_impl.hpp.DE8318589739E396.idx b/.cache/clangd/index/profile_grouped_gemm_fastgelu_impl.hpp.DE8318589739E396.idx new file mode 100755 index 0000000000000000000000000000000000000000..10c9f63e4649da63f66d6023d4b1b228d357ebf2 Binary files /dev/null and b/.cache/clangd/index/profile_grouped_gemm_fastgelu_impl.hpp.DE8318589739E396.idx differ diff --git a/.cache/clangd/index/profile_grouped_gemm_fixed_nk.cpp.12EAC200C0F64A96.idx b/.cache/clangd/index/profile_grouped_gemm_fixed_nk.cpp.12EAC200C0F64A96.idx new file mode 100755 index 0000000000000000000000000000000000000000..3a492754b7733f2843f3b5bd684d763b00085ac5 Binary files /dev/null and b/.cache/clangd/index/profile_grouped_gemm_fixed_nk.cpp.12EAC200C0F64A96.idx differ diff --git a/.cache/clangd/index/profile_grouped_gemm_fixed_nk_impl.hpp.B583CC73B5188079.idx b/.cache/clangd/index/profile_grouped_gemm_fixed_nk_impl.hpp.B583CC73B5188079.idx new file mode 100755 index 0000000000000000000000000000000000000000..737982474d2eb5496f8f4481a4c176dc0bc7df16 Binary files /dev/null and b/.cache/clangd/index/profile_grouped_gemm_fixed_nk_impl.hpp.B583CC73B5188079.idx differ diff --git a/.cache/clangd/index/profile_grouped_gemm_impl.hpp.0459F4415131A8AB.idx b/.cache/clangd/index/profile_grouped_gemm_impl.hpp.0459F4415131A8AB.idx new file mode 100755 index 0000000000000000000000000000000000000000..d4c9fd9671d7cc78c13064b9be6d22c5ddf3ceda Binary files /dev/null and b/.cache/clangd/index/profile_grouped_gemm_impl.hpp.0459F4415131A8AB.idx differ diff --git a/.cache/clangd/index/profile_groupnorm_bwd_data.cpp.B5C8D8F20885019C.idx b/.cache/clangd/index/profile_groupnorm_bwd_data.cpp.B5C8D8F20885019C.idx new file mode 100755 index 0000000000000000000000000000000000000000..eee5a691cb4694732bf484f6cdefd037f60bee1c Binary files /dev/null and b/.cache/clangd/index/profile_groupnorm_bwd_data.cpp.B5C8D8F20885019C.idx differ diff --git a/.cache/clangd/index/profile_groupnorm_bwd_data_impl.hpp.05BE2AB219AD2046.idx b/.cache/clangd/index/profile_groupnorm_bwd_data_impl.hpp.05BE2AB219AD2046.idx new file mode 100755 index 0000000000000000000000000000000000000000..3ebb81cb4cab068cef52ba82f0133656c85c7d44 Binary files /dev/null and b/.cache/clangd/index/profile_groupnorm_bwd_data_impl.hpp.05BE2AB219AD2046.idx differ diff --git a/.cache/clangd/index/profile_groupnorm_bwd_gamma_beta.cpp.0D9708552C2EDA92.idx b/.cache/clangd/index/profile_groupnorm_bwd_gamma_beta.cpp.0D9708552C2EDA92.idx new file mode 100755 index 0000000000000000000000000000000000000000..5080ed9b10dbee571cb064f7cd66364147b78a09 Binary files /dev/null and b/.cache/clangd/index/profile_groupnorm_bwd_gamma_beta.cpp.0D9708552C2EDA92.idx differ diff --git a/.cache/clangd/index/profile_groupnorm_bwd_gamma_beta_impl.hpp.F912A6BA3766E9B6.idx b/.cache/clangd/index/profile_groupnorm_bwd_gamma_beta_impl.hpp.F912A6BA3766E9B6.idx new file mode 100755 index 0000000000000000000000000000000000000000..2ff61fc6bca588581c110744d25dcec5a4bb6a7a Binary files /dev/null and b/.cache/clangd/index/profile_groupnorm_bwd_gamma_beta_impl.hpp.F912A6BA3766E9B6.idx differ diff --git a/.cache/clangd/index/profile_groupnorm_fwd.cpp.B71B17B33AF365C7.idx b/.cache/clangd/index/profile_groupnorm_fwd.cpp.B71B17B33AF365C7.idx new file mode 100755 index 0000000000000000000000000000000000000000..7156c186bd99a7093a7422be634c837f0bac2954 Binary files /dev/null and b/.cache/clangd/index/profile_groupnorm_fwd.cpp.B71B17B33AF365C7.idx differ diff --git a/.cache/clangd/index/profile_groupnorm_fwd_impl.hpp.DD036B9EC45A3789.idx b/.cache/clangd/index/profile_groupnorm_fwd_impl.hpp.DD036B9EC45A3789.idx new file mode 100755 index 0000000000000000000000000000000000000000..275bb734c77eb9a7598e10452cb506e7d11744e8 Binary files /dev/null and b/.cache/clangd/index/profile_groupnorm_fwd_impl.hpp.DD036B9EC45A3789.idx differ diff --git a/.cache/clangd/index/profile_layernorm_bwd_data.cpp.EDB022306AF7F904.idx b/.cache/clangd/index/profile_layernorm_bwd_data.cpp.EDB022306AF7F904.idx new file mode 100755 index 0000000000000000000000000000000000000000..5eeac590a67f4bc6e2f30b4751f3091b65a536a7 Binary files /dev/null and b/.cache/clangd/index/profile_layernorm_bwd_data.cpp.EDB022306AF7F904.idx differ diff --git a/.cache/clangd/index/profile_layernorm_bwd_data_impl.hpp.CF00FB7A122475B2.idx b/.cache/clangd/index/profile_layernorm_bwd_data_impl.hpp.CF00FB7A122475B2.idx new file mode 100755 index 0000000000000000000000000000000000000000..95428076a81660494669c6a28b2f0af6b69fa78b Binary files /dev/null and b/.cache/clangd/index/profile_layernorm_bwd_data_impl.hpp.CF00FB7A122475B2.idx differ diff --git a/.cache/clangd/index/profile_layernorm_bwd_gamma_beta.cpp.58ACE0ADA4571F63.idx b/.cache/clangd/index/profile_layernorm_bwd_gamma_beta.cpp.58ACE0ADA4571F63.idx new file mode 100755 index 0000000000000000000000000000000000000000..c4990ca5cca4385d525cf150ad98d2216c93b3d3 Binary files /dev/null and b/.cache/clangd/index/profile_layernorm_bwd_gamma_beta.cpp.58ACE0ADA4571F63.idx differ diff --git a/.cache/clangd/index/profile_layernorm_bwd_gamma_beta_impl.hpp.672AE63374836A4E.idx b/.cache/clangd/index/profile_layernorm_bwd_gamma_beta_impl.hpp.672AE63374836A4E.idx new file mode 100755 index 0000000000000000000000000000000000000000..4bffceb08e7a69ea571c901e34340947a6e1fb4f Binary files /dev/null and b/.cache/clangd/index/profile_layernorm_bwd_gamma_beta_impl.hpp.672AE63374836A4E.idx differ diff --git a/.cache/clangd/index/profile_layernorm_fwd.cpp.FA9A3EA5A9ABAF4C.idx b/.cache/clangd/index/profile_layernorm_fwd.cpp.FA9A3EA5A9ABAF4C.idx new file mode 100755 index 0000000000000000000000000000000000000000..007049bc5011bd927a34d326fc7425d5eb781bcc Binary files /dev/null and b/.cache/clangd/index/profile_layernorm_fwd.cpp.FA9A3EA5A9ABAF4C.idx differ diff --git a/.cache/clangd/index/profile_layernorm_fwd_impl.hpp.BF1975AA887FB0A6.idx b/.cache/clangd/index/profile_layernorm_fwd_impl.hpp.BF1975AA887FB0A6.idx new file mode 100755 index 0000000000000000000000000000000000000000..71db8cc202906babc8ca4b2206c7ee07cdcec9ea Binary files /dev/null and b/.cache/clangd/index/profile_layernorm_fwd_impl.hpp.BF1975AA887FB0A6.idx differ diff --git a/.cache/clangd/index/profile_max_pool3d_bwd.cpp.7793240AA812E755.idx b/.cache/clangd/index/profile_max_pool3d_bwd.cpp.7793240AA812E755.idx new file mode 100755 index 0000000000000000000000000000000000000000..2c523db25726c832a5f9d52d4bfa2d83199e6c2f Binary files /dev/null and b/.cache/clangd/index/profile_max_pool3d_bwd.cpp.7793240AA812E755.idx differ diff --git a/.cache/clangd/index/profile_max_pool3d_bwd_impl.hpp.885D2D1066A38A2E.idx b/.cache/clangd/index/profile_max_pool3d_bwd_impl.hpp.885D2D1066A38A2E.idx new file mode 100755 index 0000000000000000000000000000000000000000..2a63bf051e333d069d5e110a72a941322e03024f Binary files /dev/null and b/.cache/clangd/index/profile_max_pool3d_bwd_impl.hpp.885D2D1066A38A2E.idx differ diff --git a/.cache/clangd/index/profile_max_pool3d_fwd.cpp.11655872CD06FCBE.idx b/.cache/clangd/index/profile_max_pool3d_fwd.cpp.11655872CD06FCBE.idx new file mode 100755 index 0000000000000000000000000000000000000000..3ac6f2a5777303c4319e2f1abae0b8b545f6f06f Binary files /dev/null and b/.cache/clangd/index/profile_max_pool3d_fwd.cpp.11655872CD06FCBE.idx differ diff --git a/.cache/clangd/index/profile_permute_scale.cpp.A23434A32C62DC3B.idx b/.cache/clangd/index/profile_permute_scale.cpp.A23434A32C62DC3B.idx new file mode 100755 index 0000000000000000000000000000000000000000..73d2b9eae79a65d1d6fda4ff42809cfb913f01ad Binary files /dev/null and b/.cache/clangd/index/profile_permute_scale.cpp.A23434A32C62DC3B.idx differ diff --git a/.cache/clangd/index/profile_permute_scale_impl.hpp.9398D48C7353608C.idx b/.cache/clangd/index/profile_permute_scale_impl.hpp.9398D48C7353608C.idx new file mode 100755 index 0000000000000000000000000000000000000000..2baeed23f6ff4320b52006d4abcc21a532a4c415 Binary files /dev/null and b/.cache/clangd/index/profile_permute_scale_impl.hpp.9398D48C7353608C.idx differ diff --git a/.cache/clangd/index/profile_pool3d_fwd_impl.hpp.4E7051C23FCACC74.idx b/.cache/clangd/index/profile_pool3d_fwd_impl.hpp.4E7051C23FCACC74.idx new file mode 100755 index 0000000000000000000000000000000000000000..1870c6cd48dfa8e5614d61d72982306fa7589031 Binary files /dev/null and b/.cache/clangd/index/profile_pool3d_fwd_impl.hpp.4E7051C23FCACC74.idx differ diff --git a/.cache/clangd/index/profile_reduce.cpp.7CFAAB4D3A545696.idx b/.cache/clangd/index/profile_reduce.cpp.7CFAAB4D3A545696.idx new file mode 100755 index 0000000000000000000000000000000000000000..c961d17fabb9bd3c39aa50eff0a7c5a3bd1ca2ef Binary files /dev/null and b/.cache/clangd/index/profile_reduce.cpp.7CFAAB4D3A545696.idx differ diff --git a/.cache/clangd/index/profile_reduce_impl.hpp.17B63EE6A3A6D55C.idx b/.cache/clangd/index/profile_reduce_impl.hpp.17B63EE6A3A6D55C.idx new file mode 100755 index 0000000000000000000000000000000000000000..494b10d5b6efbc1255013a7639bf67d160543a9c Binary files /dev/null and b/.cache/clangd/index/profile_reduce_impl.hpp.17B63EE6A3A6D55C.idx differ diff --git a/.cache/clangd/index/profile_softmax.cpp.31A9AF77C4279825.idx b/.cache/clangd/index/profile_softmax.cpp.31A9AF77C4279825.idx new file mode 100755 index 0000000000000000000000000000000000000000..34318f6439f93a9b6ea06f0e5e63e1b533284eca Binary files /dev/null and b/.cache/clangd/index/profile_softmax.cpp.31A9AF77C4279825.idx differ diff --git a/.cache/clangd/index/profile_softmax_impl.hpp.28DC8A49A2D3559F.idx b/.cache/clangd/index/profile_softmax_impl.hpp.28DC8A49A2D3559F.idx new file mode 100755 index 0000000000000000000000000000000000000000..8e742d8fc3bdfd8d18e5451e8929681917cf1452 Binary files /dev/null and b/.cache/clangd/index/profile_softmax_impl.hpp.28DC8A49A2D3559F.idx differ diff --git a/.cache/clangd/index/profile_transpose.cpp.3798E55CF7D973A7.idx b/.cache/clangd/index/profile_transpose.cpp.3798E55CF7D973A7.idx new file mode 100755 index 0000000000000000000000000000000000000000..4e3cd18a8be8e9bb40ce5514b75c04cd86fae817 Binary files /dev/null and b/.cache/clangd/index/profile_transpose.cpp.3798E55CF7D973A7.idx differ diff --git a/.cache/clangd/index/profile_transpose_impl.hpp.8CE368E3C650AFE1.idx b/.cache/clangd/index/profile_transpose_impl.hpp.8CE368E3C650AFE1.idx new file mode 100755 index 0000000000000000000000000000000000000000..556406af5861a91cda4f99dd7a5f83ff4dfd2986 Binary files /dev/null and b/.cache/clangd/index/profile_transpose_impl.hpp.8CE368E3C650AFE1.idx differ diff --git a/.cache/clangd/index/profiler.cpp.A1998E185346D702.idx b/.cache/clangd/index/profiler.cpp.A1998E185346D702.idx new file mode 100755 index 0000000000000000000000000000000000000000..735e09e6661c021045373137e31e53da6ffcbb52 Binary files /dev/null and b/.cache/clangd/index/profiler.cpp.A1998E185346D702.idx differ diff --git a/.cache/clangd/index/profiler_operation_registry.hpp.C3738D3851333A5F.idx b/.cache/clangd/index/profiler_operation_registry.hpp.C3738D3851333A5F.idx new file mode 100755 index 0000000000000000000000000000000000000000..e2a512c4c458f44a3e3ea1be0c6108fc63af2cee Binary files /dev/null and b/.cache/clangd/index/profiler_operation_registry.hpp.C3738D3851333A5F.idx differ diff --git a/.cache/clangd/index/put_element_fp16.cpp.4CB3E91BA1C3425F.idx b/.cache/clangd/index/put_element_fp16.cpp.4CB3E91BA1C3425F.idx new file mode 100755 index 0000000000000000000000000000000000000000..46d3175f8ab203cb671dd8c56b5232635725da2f Binary files /dev/null and b/.cache/clangd/index/put_element_fp16.cpp.4CB3E91BA1C3425F.idx differ diff --git a/.cache/clangd/index/quantization_operation.hpp.BD94143B871C7D8C.idx b/.cache/clangd/index/quantization_operation.hpp.BD94143B871C7D8C.idx new file mode 100755 index 0000000000000000000000000000000000000000..f61c5b0a61b6194911ca2edc909d95500c5922e0 Binary files /dev/null and b/.cache/clangd/index/quantization_operation.hpp.BD94143B871C7D8C.idx differ diff --git a/.cache/clangd/index/random_gen.hpp.B234A29FCA71839D.idx b/.cache/clangd/index/random_gen.hpp.B234A29FCA71839D.idx new file mode 100755 index 0000000000000000000000000000000000000000..217b12d2b98d2f086f8ad173b3f478b52edfe8ea Binary files /dev/null and b/.cache/clangd/index/random_gen.hpp.B234A29FCA71839D.idx differ diff --git a/.cache/clangd/index/ranges.hpp.66B5D2199BD23CD4.idx b/.cache/clangd/index/ranges.hpp.66B5D2199BD23CD4.idx new file mode 100755 index 0000000000000000000000000000000000000000..737d3ebf8d5ed6da0e74839ba20633a546450491 Binary files /dev/null and b/.cache/clangd/index/ranges.hpp.66B5D2199BD23CD4.idx differ diff --git a/.cache/clangd/index/reduce.hpp.7DC5A811A95F14CB.idx b/.cache/clangd/index/reduce.hpp.7DC5A811A95F14CB.idx new file mode 100755 index 0000000000000000000000000000000000000000..44429614138e5dcc9164703e873718a471c31e1b Binary files /dev/null and b/.cache/clangd/index/reduce.hpp.7DC5A811A95F14CB.idx differ diff --git a/.cache/clangd/index/reduce_blockwise.cpp.E231BF6C564BCCFD.idx b/.cache/clangd/index/reduce_blockwise.cpp.E231BF6C564BCCFD.idx new file mode 100755 index 0000000000000000000000000000000000000000..cf0ddca2e40c92a0e44e6ea1f9e00f27e14d9422 Binary files /dev/null and b/.cache/clangd/index/reduce_blockwise.cpp.E231BF6C564BCCFD.idx differ diff --git a/.cache/clangd/index/reduce_blockwise_impl.hpp.B38B56E3F9BFC70D.idx b/.cache/clangd/index/reduce_blockwise_impl.hpp.B38B56E3F9BFC70D.idx new file mode 100755 index 0000000000000000000000000000000000000000..080cb5f56f398ea203911d338e42ca530b3d5dcd Binary files /dev/null and b/.cache/clangd/index/reduce_blockwise_impl.hpp.B38B56E3F9BFC70D.idx differ diff --git a/.cache/clangd/index/reduce_blockwise_two_call.cpp.C62D3A32F608E374.idx b/.cache/clangd/index/reduce_blockwise_two_call.cpp.C62D3A32F608E374.idx new file mode 100755 index 0000000000000000000000000000000000000000..22bf457f3ce3e168af39becded0e328554a0cd10 Binary files /dev/null and b/.cache/clangd/index/reduce_blockwise_two_call.cpp.C62D3A32F608E374.idx differ diff --git a/.cache/clangd/index/reduce_example_common.hpp.A3D8D5EB42288D30.idx b/.cache/clangd/index/reduce_example_common.hpp.A3D8D5EB42288D30.idx new file mode 100755 index 0000000000000000000000000000000000000000..1c59e58464c13a7bf497af26145c66a39f2524e9 Binary files /dev/null and b/.cache/clangd/index/reduce_example_common.hpp.A3D8D5EB42288D30.idx differ diff --git a/.cache/clangd/index/reduce_multiblock_atomic_add.cpp.22DBB7B98E8ABAE4.idx b/.cache/clangd/index/reduce_multiblock_atomic_add.cpp.22DBB7B98E8ABAE4.idx new file mode 100755 index 0000000000000000000000000000000000000000..87d20412c8b1e23914074619b98a5c23dcc11df1 Binary files /dev/null and b/.cache/clangd/index/reduce_multiblock_atomic_add.cpp.22DBB7B98E8ABAE4.idx differ diff --git a/.cache/clangd/index/reduce_multiblock_atomic_add_impl.hpp.014F022BC601B7F9.idx b/.cache/clangd/index/reduce_multiblock_atomic_add_impl.hpp.014F022BC601B7F9.idx new file mode 100755 index 0000000000000000000000000000000000000000..89b1e0fb34d1809ea356b8a4dc620509b8d22a84 Binary files /dev/null and b/.cache/clangd/index/reduce_multiblock_atomic_add_impl.hpp.014F022BC601B7F9.idx differ diff --git a/.cache/clangd/index/reduce_no_index.cpp.3EDCFAA9D727E5B6.idx b/.cache/clangd/index/reduce_no_index.cpp.3EDCFAA9D727E5B6.idx new file mode 100755 index 0000000000000000000000000000000000000000..173881a446cef841c9f5e9d6b348852b703cf32e Binary files /dev/null and b/.cache/clangd/index/reduce_no_index.cpp.3EDCFAA9D727E5B6.idx differ diff --git a/.cache/clangd/index/reduce_with_index.cpp.F84EAB2F5A59285D.idx b/.cache/clangd/index/reduce_with_index.cpp.F84EAB2F5A59285D.idx new file mode 100755 index 0000000000000000000000000000000000000000..a165e1ee8e46bf1adab6ea5ce7ca735e2738a0b1 Binary files /dev/null and b/.cache/clangd/index/reduce_with_index.cpp.F84EAB2F5A59285D.idx differ diff --git a/.cache/clangd/index/reduction_common.hpp.C00F2970D4BF7FA3.idx b/.cache/clangd/index/reduction_common.hpp.C00F2970D4BF7FA3.idx new file mode 100755 index 0000000000000000000000000000000000000000..dd6f8bfcdfd9c03fdbbbde843884067bfd387912 Binary files /dev/null and b/.cache/clangd/index/reduction_common.hpp.C00F2970D4BF7FA3.idx differ diff --git a/.cache/clangd/index/reduction_enums.hpp.871AE7D90DC9E069.idx b/.cache/clangd/index/reduction_enums.hpp.871AE7D90DC9E069.idx new file mode 100755 index 0000000000000000000000000000000000000000..aa47ab062f577f04a9514c5e64c0a89bcaaa6072 Binary files /dev/null and b/.cache/clangd/index/reduction_enums.hpp.871AE7D90DC9E069.idx differ diff --git a/.cache/clangd/index/reduction_functions_accumulate.hpp.7D2C2CFC245627F0.idx b/.cache/clangd/index/reduction_functions_accumulate.hpp.7D2C2CFC245627F0.idx new file mode 100755 index 0000000000000000000000000000000000000000..6375331dde28186131401c51dd2c43a8c0af9fc9 Binary files /dev/null and b/.cache/clangd/index/reduction_functions_accumulate.hpp.7D2C2CFC245627F0.idx differ diff --git a/.cache/clangd/index/reduction_functions_blockwise.hpp.C958E7F57DF3365E.idx b/.cache/clangd/index/reduction_functions_blockwise.hpp.C958E7F57DF3365E.idx new file mode 100755 index 0000000000000000000000000000000000000000..3ce2b703e9397720545620d5e22821eb6dfe0d02 Binary files /dev/null and b/.cache/clangd/index/reduction_functions_blockwise.hpp.C958E7F57DF3365E.idx differ diff --git a/.cache/clangd/index/reduction_functions_threadwise.hpp.7A06A2856650B185.idx b/.cache/clangd/index/reduction_functions_threadwise.hpp.7A06A2856650B185.idx new file mode 100755 index 0000000000000000000000000000000000000000..997f46ae534e4515117d2456c8ac2cca72b20213 Binary files /dev/null and b/.cache/clangd/index/reduction_functions_threadwise.hpp.7A06A2856650B185.idx differ diff --git a/.cache/clangd/index/reduction_operator.hpp.28F266F9F2F32A7B.idx b/.cache/clangd/index/reduction_operator.hpp.28F266F9F2F32A7B.idx new file mode 100755 index 0000000000000000000000000000000000000000..fc9e0fe29b0730ca305690f09e463ad0e1ea8651 Binary files /dev/null and b/.cache/clangd/index/reduction_operator.hpp.28F266F9F2F32A7B.idx differ diff --git a/.cache/clangd/index/reduction_operator_mapping.hpp.8A11EE4B065A6CAE.idx b/.cache/clangd/index/reduction_operator_mapping.hpp.8A11EE4B065A6CAE.idx new file mode 100755 index 0000000000000000000000000000000000000000..0845ac410108dd8aea71db0b3fdb0054c50e5b7b Binary files /dev/null and b/.cache/clangd/index/reduction_operator_mapping.hpp.8A11EE4B065A6CAE.idx differ diff --git a/.cache/clangd/index/reference_avgpool_bwd.hpp.EF9EF399DB15462F.idx b/.cache/clangd/index/reference_avgpool_bwd.hpp.EF9EF399DB15462F.idx new file mode 100755 index 0000000000000000000000000000000000000000..4b0c748f9e4b37847f9b6796b2d37d3546dfc2b2 Binary files /dev/null and b/.cache/clangd/index/reference_avgpool_bwd.hpp.EF9EF399DB15462F.idx differ diff --git a/.cache/clangd/index/reference_batched_gemm.hpp.AF38237B103D733E.idx b/.cache/clangd/index/reference_batched_gemm.hpp.AF38237B103D733E.idx new file mode 100755 index 0000000000000000000000000000000000000000..fc7a61431d9f25a8f9fea6a6469502d7cca1f187 Binary files /dev/null and b/.cache/clangd/index/reference_batched_gemm.hpp.AF38237B103D733E.idx differ diff --git a/.cache/clangd/index/reference_batchnorm_backward.hpp.DB836B16A0EC29D6.idx b/.cache/clangd/index/reference_batchnorm_backward.hpp.DB836B16A0EC29D6.idx new file mode 100755 index 0000000000000000000000000000000000000000..0e02d9a8a0c0efb79972ef978861040897ac983c Binary files /dev/null and b/.cache/clangd/index/reference_batchnorm_backward.hpp.DB836B16A0EC29D6.idx differ diff --git a/.cache/clangd/index/reference_batchnorm_forward.hpp.0421D3C13A4F2108.idx b/.cache/clangd/index/reference_batchnorm_forward.hpp.0421D3C13A4F2108.idx new file mode 100755 index 0000000000000000000000000000000000000000..9959d7ff3272459e5e2fc6b62e007d228290f12a Binary files /dev/null and b/.cache/clangd/index/reference_batchnorm_forward.hpp.0421D3C13A4F2108.idx differ diff --git a/.cache/clangd/index/reference_batchnorm_infer.hpp.B110A1DBC8883C05.idx b/.cache/clangd/index/reference_batchnorm_infer.hpp.B110A1DBC8883C05.idx new file mode 100755 index 0000000000000000000000000000000000000000..0f0823870227d8473b4ad01ee59a1d20fc1f5662 Binary files /dev/null and b/.cache/clangd/index/reference_batchnorm_infer.hpp.B110A1DBC8883C05.idx differ diff --git a/.cache/clangd/index/reference_cgemm.hpp.9B0EE32433381722.idx b/.cache/clangd/index/reference_cgemm.hpp.9B0EE32433381722.idx new file mode 100755 index 0000000000000000000000000000000000000000..3cbf35ab101ae558208f5573ad7840183ab1047a Binary files /dev/null and b/.cache/clangd/index/reference_cgemm.hpp.9B0EE32433381722.idx differ diff --git a/.cache/clangd/index/reference_column_to_image.hpp.B1B86724AE64837E.idx b/.cache/clangd/index/reference_column_to_image.hpp.B1B86724AE64837E.idx new file mode 100755 index 0000000000000000000000000000000000000000..1335c220bf6c822cc6c9449cf13519908e71378d Binary files /dev/null and b/.cache/clangd/index/reference_column_to_image.hpp.B1B86724AE64837E.idx differ diff --git a/.cache/clangd/index/reference_contraction.hpp.4808E834C3E32AEB.idx b/.cache/clangd/index/reference_contraction.hpp.4808E834C3E32AEB.idx new file mode 100755 index 0000000000000000000000000000000000000000..19acb98c6386220709cb3c7d012342b8c54711a2 Binary files /dev/null and b/.cache/clangd/index/reference_contraction.hpp.4808E834C3E32AEB.idx differ diff --git a/.cache/clangd/index/reference_conv_bwd_data.hpp.7B56299251FBE761.idx b/.cache/clangd/index/reference_conv_bwd_data.hpp.7B56299251FBE761.idx new file mode 100755 index 0000000000000000000000000000000000000000..e902493090cfac8e62d2355a2f0d09da83634b35 Binary files /dev/null and b/.cache/clangd/index/reference_conv_bwd_data.hpp.7B56299251FBE761.idx differ diff --git a/.cache/clangd/index/reference_conv_bwd_weight.hpp.474E92061BE55B30.idx b/.cache/clangd/index/reference_conv_bwd_weight.hpp.474E92061BE55B30.idx new file mode 100755 index 0000000000000000000000000000000000000000..77a186319956c71237dbe6d57e91eeda692a12a1 Binary files /dev/null and b/.cache/clangd/index/reference_conv_bwd_weight.hpp.474E92061BE55B30.idx differ diff --git a/.cache/clangd/index/reference_conv_fwd.cpp.3703F6A90F557994.idx b/.cache/clangd/index/reference_conv_fwd.cpp.3703F6A90F557994.idx new file mode 100755 index 0000000000000000000000000000000000000000..9d1db7cf73064edbdb7114eb30b7dfc037967f21 Binary files /dev/null and b/.cache/clangd/index/reference_conv_fwd.cpp.3703F6A90F557994.idx differ diff --git a/.cache/clangd/index/reference_conv_fwd.hpp.4266D54C9E6D337C.idx b/.cache/clangd/index/reference_conv_fwd.hpp.4266D54C9E6D337C.idx new file mode 100755 index 0000000000000000000000000000000000000000..b3c0741b526e37f7b9bde09b144c2fcd269746a1 Binary files /dev/null and b/.cache/clangd/index/reference_conv_fwd.hpp.4266D54C9E6D337C.idx differ diff --git a/.cache/clangd/index/reference_conv_fwd_bias_activation.hpp.CC5C1A67067C17BD.idx b/.cache/clangd/index/reference_conv_fwd_bias_activation.hpp.CC5C1A67067C17BD.idx new file mode 100755 index 0000000000000000000000000000000000000000..97991a850b0e233c6b25a4624c26b549d4894207 Binary files /dev/null and b/.cache/clangd/index/reference_conv_fwd_bias_activation.hpp.CC5C1A67067C17BD.idx differ diff --git a/.cache/clangd/index/reference_conv_fwd_bias_activation_add.hpp.05EAEEBBFD920597.idx b/.cache/clangd/index/reference_conv_fwd_bias_activation_add.hpp.05EAEEBBFD920597.idx new file mode 100755 index 0000000000000000000000000000000000000000..a8d304c8b319cb7d95b82cb81222f954914d2be3 Binary files /dev/null and b/.cache/clangd/index/reference_conv_fwd_bias_activation_add.hpp.05EAEEBBFD920597.idx differ diff --git a/.cache/clangd/index/reference_gemm.hpp.0A74F0F3AFBAC0AB.idx b/.cache/clangd/index/reference_gemm.hpp.0A74F0F3AFBAC0AB.idx new file mode 100755 index 0000000000000000000000000000000000000000..9d3d50d607cbf09a6400d9afca340a0abbf470fe Binary files /dev/null and b/.cache/clangd/index/reference_gemm.hpp.0A74F0F3AFBAC0AB.idx differ diff --git a/.cache/clangd/index/reference_gemm_layernorm.hpp.9AA4C354D9D24BEC.idx b/.cache/clangd/index/reference_gemm_layernorm.hpp.9AA4C354D9D24BEC.idx new file mode 100755 index 0000000000000000000000000000000000000000..9e7ec451dd5d5ec89f1735b61f425b61080520c8 Binary files /dev/null and b/.cache/clangd/index/reference_gemm_layernorm.hpp.9AA4C354D9D24BEC.idx differ diff --git a/.cache/clangd/index/reference_groupnorm.hpp.368D3290BA70398F.idx b/.cache/clangd/index/reference_groupnorm.hpp.368D3290BA70398F.idx new file mode 100755 index 0000000000000000000000000000000000000000..6fcc433bc031a43ea269a6e879a7b8bb8876fdbf Binary files /dev/null and b/.cache/clangd/index/reference_groupnorm.hpp.368D3290BA70398F.idx differ diff --git a/.cache/clangd/index/reference_groupnorm_bwd.hpp.C24505C3C5BFB298.idx b/.cache/clangd/index/reference_groupnorm_bwd.hpp.C24505C3C5BFB298.idx new file mode 100755 index 0000000000000000000000000000000000000000..6720ddc84188374dddc355634ae30bbcdd64740d Binary files /dev/null and b/.cache/clangd/index/reference_groupnorm_bwd.hpp.C24505C3C5BFB298.idx differ diff --git a/.cache/clangd/index/reference_image_to_column.hpp.14F89CCD7031E508.idx b/.cache/clangd/index/reference_image_to_column.hpp.14F89CCD7031E508.idx new file mode 100755 index 0000000000000000000000000000000000000000..6b59f0ac40fe8d868f2c1631e3ed055f0308fd10 Binary files /dev/null and b/.cache/clangd/index/reference_image_to_column.hpp.14F89CCD7031E508.idx differ diff --git a/.cache/clangd/index/reference_layernorm.hpp.B9C72CF73E810635.idx b/.cache/clangd/index/reference_layernorm.hpp.B9C72CF73E810635.idx new file mode 100755 index 0000000000000000000000000000000000000000..f3f3e04ba3ab0d05067ab685d9c28ef0de1a4092 Binary files /dev/null and b/.cache/clangd/index/reference_layernorm.hpp.B9C72CF73E810635.idx differ diff --git a/.cache/clangd/index/reference_layernorm_bwd.hpp.F0026FDE37931C67.idx b/.cache/clangd/index/reference_layernorm_bwd.hpp.F0026FDE37931C67.idx new file mode 100755 index 0000000000000000000000000000000000000000..bc1371b4b6be270bc44bd85897b2ea92a3e00e71 Binary files /dev/null and b/.cache/clangd/index/reference_layernorm_bwd.hpp.F0026FDE37931C67.idx differ diff --git a/.cache/clangd/index/reference_maxpool_bwd.hpp.AA4F2921ED8F7A67.idx b/.cache/clangd/index/reference_maxpool_bwd.hpp.AA4F2921ED8F7A67.idx new file mode 100755 index 0000000000000000000000000000000000000000..77b6d6339a46025d5e4663551892750ab187e5c7 Binary files /dev/null and b/.cache/clangd/index/reference_maxpool_bwd.hpp.AA4F2921ED8F7A67.idx differ diff --git a/.cache/clangd/index/reference_pool_fwd.hpp.3BB9B8E57191E80A.idx b/.cache/clangd/index/reference_pool_fwd.hpp.3BB9B8E57191E80A.idx new file mode 100755 index 0000000000000000000000000000000000000000..2edc9661292e493e9a0d0ae64723e1bb0cabef7f Binary files /dev/null and b/.cache/clangd/index/reference_pool_fwd.hpp.3BB9B8E57191E80A.idx differ diff --git a/.cache/clangd/index/reference_reduce.hpp.0C04C3B5F65B2DFD.idx b/.cache/clangd/index/reference_reduce.hpp.0C04C3B5F65B2DFD.idx new file mode 100755 index 0000000000000000000000000000000000000000..b25d5edb8f3606ec2fbfa8752960f6fe0eb638e4 Binary files /dev/null and b/.cache/clangd/index/reference_reduce.hpp.0C04C3B5F65B2DFD.idx differ diff --git a/.cache/clangd/index/reference_softmax.hpp.4C7847F5A451A485.idx b/.cache/clangd/index/reference_softmax.hpp.4C7847F5A451A485.idx new file mode 100755 index 0000000000000000000000000000000000000000..a7ff00d725127caac16e457b88146978c3bcced2 Binary files /dev/null and b/.cache/clangd/index/reference_softmax.hpp.4C7847F5A451A485.idx differ diff --git a/.cache/clangd/index/reference_sparse_embedding3_forward_layernorm.hpp.495E2A0ED186B796.idx b/.cache/clangd/index/reference_sparse_embedding3_forward_layernorm.hpp.495E2A0ED186B796.idx new file mode 100755 index 0000000000000000000000000000000000000000..8bbb9e432389b79798606abd641556242cb09d53 Binary files /dev/null and b/.cache/clangd/index/reference_sparse_embedding3_forward_layernorm.hpp.495E2A0ED186B796.idx differ diff --git a/.cache/clangd/index/run_batched_gemm_example.inc.341E18570766CD6C.idx b/.cache/clangd/index/run_batched_gemm_example.inc.341E18570766CD6C.idx new file mode 100755 index 0000000000000000000000000000000000000000..d5c74b037668132dd3edd9b007e419f2a66f4263 Binary files /dev/null and b/.cache/clangd/index/run_batched_gemm_example.inc.341E18570766CD6C.idx differ diff --git a/.cache/clangd/index/run_batched_gemm_gemm_example.inc.B0C303C33F007446.idx b/.cache/clangd/index/run_batched_gemm_gemm_example.inc.B0C303C33F007446.idx new file mode 100755 index 0000000000000000000000000000000000000000..904c3f9388265265e14f30b57410f678c47c880d Binary files /dev/null and b/.cache/clangd/index/run_batched_gemm_gemm_example.inc.B0C303C33F007446.idx differ diff --git a/.cache/clangd/index/run_batched_gemm_scale_softmax_gemm.inc.35534F29FB76E4C9.idx b/.cache/clangd/index/run_batched_gemm_scale_softmax_gemm.inc.35534F29FB76E4C9.idx new file mode 100755 index 0000000000000000000000000000000000000000..24757b57084b4d6217be648b97b704f5b8eb258b Binary files /dev/null and b/.cache/clangd/index/run_batched_gemm_scale_softmax_gemm.inc.35534F29FB76E4C9.idx differ diff --git a/.cache/clangd/index/run_batched_gemm_scale_softmax_gemm_permute.inc.DF3E33E757137FA7.idx b/.cache/clangd/index/run_batched_gemm_scale_softmax_gemm_permute.inc.DF3E33E757137FA7.idx new file mode 100755 index 0000000000000000000000000000000000000000..58c5a11015338b3006f4efe6c1a0b66f52e77bbb Binary files /dev/null and b/.cache/clangd/index/run_batched_gemm_scale_softmax_gemm_permute.inc.DF3E33E757137FA7.idx differ diff --git a/.cache/clangd/index/run_contraction_bilinear_example.inc.427C55016011CF17.idx b/.cache/clangd/index/run_contraction_bilinear_example.inc.427C55016011CF17.idx new file mode 100755 index 0000000000000000000000000000000000000000..30da03eb6eb7be652c2aaefe998a676d2e2c5b35 Binary files /dev/null and b/.cache/clangd/index/run_contraction_bilinear_example.inc.427C55016011CF17.idx differ diff --git a/.cache/clangd/index/run_contraction_scale_example.inc.619AD6EEB2674ADA.idx b/.cache/clangd/index/run_contraction_scale_example.inc.619AD6EEB2674ADA.idx new file mode 100755 index 0000000000000000000000000000000000000000..cd3f3bec009a8543aecb1321a4678ad7b2a5c922 Binary files /dev/null and b/.cache/clangd/index/run_contraction_scale_example.inc.619AD6EEB2674ADA.idx differ diff --git a/.cache/clangd/index/run_conv2d_fwd_bias_perchannel_quantization_example.inc.453F2B26B4A84A7C.idx b/.cache/clangd/index/run_conv2d_fwd_bias_perchannel_quantization_example.inc.453F2B26B4A84A7C.idx new file mode 100755 index 0000000000000000000000000000000000000000..38c38a364559d5d1f432e3f0467b0b5256909c36 Binary files /dev/null and b/.cache/clangd/index/run_conv2d_fwd_bias_perchannel_quantization_example.inc.453F2B26B4A84A7C.idx differ diff --git a/.cache/clangd/index/run_conv2d_fwd_bias_perlayer_quantization_example.inc.41E3948246A8DC23.idx b/.cache/clangd/index/run_conv2d_fwd_bias_perlayer_quantization_example.inc.41E3948246A8DC23.idx new file mode 100755 index 0000000000000000000000000000000000000000..3805dff001680aa346f68e6ffc149e3c37f17353 Binary files /dev/null and b/.cache/clangd/index/run_conv2d_fwd_bias_perlayer_quantization_example.inc.41E3948246A8DC23.idx differ diff --git a/.cache/clangd/index/run_conv2d_fwd_perchannel_quantization_example.inc.4B57FDA755059955.idx b/.cache/clangd/index/run_conv2d_fwd_perchannel_quantization_example.inc.4B57FDA755059955.idx new file mode 100755 index 0000000000000000000000000000000000000000..9a5ce3c3d5e16cfbb98c4038734d576361803d78 Binary files /dev/null and b/.cache/clangd/index/run_conv2d_fwd_perchannel_quantization_example.inc.4B57FDA755059955.idx differ diff --git a/.cache/clangd/index/run_conv2d_fwd_perlayer_quantization_example.inc.3D388FA1A8D19B72.idx b/.cache/clangd/index/run_conv2d_fwd_perlayer_quantization_example.inc.3D388FA1A8D19B72.idx new file mode 100755 index 0000000000000000000000000000000000000000..55a32555569d9ef71d1d3e3389e85129d7ef0c52 Binary files /dev/null and b/.cache/clangd/index/run_conv2d_fwd_perlayer_quantization_example.inc.3D388FA1A8D19B72.idx differ diff --git a/.cache/clangd/index/run_convnd_activ_example.inc.F2FB9ED6F9BA4B8A.idx b/.cache/clangd/index/run_convnd_activ_example.inc.F2FB9ED6F9BA4B8A.idx new file mode 100755 index 0000000000000000000000000000000000000000..2c90c4a8a9a3e4d708066bfc33b1a1aa5bf40d83 Binary files /dev/null and b/.cache/clangd/index/run_convnd_activ_example.inc.F2FB9ED6F9BA4B8A.idx differ diff --git a/.cache/clangd/index/run_convnd_fwd_example.inc.6C6FF11BF5A73D93.idx b/.cache/clangd/index/run_convnd_fwd_example.inc.6C6FF11BF5A73D93.idx new file mode 100755 index 0000000000000000000000000000000000000000..6c2e11a0c444142d28dc61e986cf558749dfc478 Binary files /dev/null and b/.cache/clangd/index/run_convnd_fwd_example.inc.6C6FF11BF5A73D93.idx differ diff --git a/.cache/clangd/index/run_convnd_fwd_max_example.inc.4C55B2EACEAC1C07.idx b/.cache/clangd/index/run_convnd_fwd_max_example.inc.4C55B2EACEAC1C07.idx new file mode 100755 index 0000000000000000000000000000000000000000..18b6d7116683392e924cb6f3eb933c2f2806d599 Binary files /dev/null and b/.cache/clangd/index/run_convnd_fwd_max_example.inc.4C55B2EACEAC1C07.idx differ diff --git a/.cache/clangd/index/run_gemm_add_add_fastgelu_example.inc.E8AFCCD92C8D9967.idx b/.cache/clangd/index/run_gemm_add_add_fastgelu_example.inc.E8AFCCD92C8D9967.idx new file mode 100755 index 0000000000000000000000000000000000000000..b762d577797145913ee694d80e6a6ddeaa6af398 Binary files /dev/null and b/.cache/clangd/index/run_gemm_add_add_fastgelu_example.inc.E8AFCCD92C8D9967.idx differ diff --git a/.cache/clangd/index/run_gemm_add_multiply_example.inc.3C87D5BB1B980F33.idx b/.cache/clangd/index/run_gemm_add_multiply_example.inc.3C87D5BB1B980F33.idx new file mode 100755 index 0000000000000000000000000000000000000000..93c15d941ceba4c11199e5c3b244f706a6a417a0 Binary files /dev/null and b/.cache/clangd/index/run_gemm_add_multiply_example.inc.3C87D5BB1B980F33.idx differ diff --git a/.cache/clangd/index/run_gemm_example.inc.4868A822F4677354.idx b/.cache/clangd/index/run_gemm_example.inc.4868A822F4677354.idx new file mode 100755 index 0000000000000000000000000000000000000000..96da48915bc0f269792f9a1f8fb76505afe7250e Binary files /dev/null and b/.cache/clangd/index/run_gemm_example.inc.4868A822F4677354.idx differ diff --git a/.cache/clangd/index/run_gemm_test.inc.B7894CAD83BCCCF5.idx b/.cache/clangd/index/run_gemm_test.inc.B7894CAD83BCCCF5.idx new file mode 100755 index 0000000000000000000000000000000000000000..fc965a4f57429a883e279c1810d40af968ba75e3 Binary files /dev/null and b/.cache/clangd/index/run_gemm_test.inc.B7894CAD83BCCCF5.idx differ diff --git a/.cache/clangd/index/run_grouped_conv_bwd_data_bias_relu_example.inc.1D7F06778A26066D.idx b/.cache/clangd/index/run_grouped_conv_bwd_data_bias_relu_example.inc.1D7F06778A26066D.idx new file mode 100755 index 0000000000000000000000000000000000000000..ae56482ff0a3d73c515d6d35696b37d3bbe324cb Binary files /dev/null and b/.cache/clangd/index/run_grouped_conv_bwd_data_bias_relu_example.inc.1D7F06778A26066D.idx differ diff --git a/.cache/clangd/index/run_grouped_conv_bwd_data_example.inc.11E97FA73102DBD5.idx b/.cache/clangd/index/run_grouped_conv_bwd_data_example.inc.11E97FA73102DBD5.idx new file mode 100755 index 0000000000000000000000000000000000000000..1c6903acd60c713d493c4372535238e1be39bb7b Binary files /dev/null and b/.cache/clangd/index/run_grouped_conv_bwd_data_example.inc.11E97FA73102DBD5.idx differ diff --git a/.cache/clangd/index/run_grouped_conv_bwd_weight_example.inc.EE2E4EE0E60E4D81.idx b/.cache/clangd/index/run_grouped_conv_bwd_weight_example.inc.EE2E4EE0E60E4D81.idx new file mode 100755 index 0000000000000000000000000000000000000000..f62070f5bc3e738a40f04d3dae4e138b97ffdc96 Binary files /dev/null and b/.cache/clangd/index/run_grouped_conv_bwd_weight_example.inc.EE2E4EE0E60E4D81.idx differ diff --git a/.cache/clangd/index/run_grouped_conv_conv_fwd_example.inc.88B5CE7F22532219.idx b/.cache/clangd/index/run_grouped_conv_conv_fwd_example.inc.88B5CE7F22532219.idx new file mode 100755 index 0000000000000000000000000000000000000000..4a425ad628181a269ada19cebaa5ad1b678fcaef Binary files /dev/null and b/.cache/clangd/index/run_grouped_conv_conv_fwd_example.inc.88B5CE7F22532219.idx differ diff --git a/.cache/clangd/index/run_grouped_conv_fwd_bias_relu_add_example.inc.7B0A8630D499DF28.idx b/.cache/clangd/index/run_grouped_conv_fwd_bias_relu_add_example.inc.7B0A8630D499DF28.idx new file mode 100755 index 0000000000000000000000000000000000000000..15f5e058e020723a7feba71cd29a4dc948bdfec1 Binary files /dev/null and b/.cache/clangd/index/run_grouped_conv_fwd_bias_relu_add_example.inc.7B0A8630D499DF28.idx differ diff --git a/.cache/clangd/index/run_grouped_conv_fwd_example.inc.C65A743D8AABBB7C.idx b/.cache/clangd/index/run_grouped_conv_fwd_example.inc.C65A743D8AABBB7C.idx new file mode 100755 index 0000000000000000000000000000000000000000..c17461c5ad93d50763598858699fe37a66a52e5e Binary files /dev/null and b/.cache/clangd/index/run_grouped_conv_fwd_example.inc.C65A743D8AABBB7C.idx differ diff --git a/.cache/clangd/index/run_grouped_gemm_example.inc.FE653C75BFA23618.idx b/.cache/clangd/index/run_grouped_gemm_example.inc.FE653C75BFA23618.idx new file mode 100755 index 0000000000000000000000000000000000000000..5782d15fad14efb113f459a540a2175521b74843 Binary files /dev/null and b/.cache/clangd/index/run_grouped_gemm_example.inc.FE653C75BFA23618.idx differ diff --git a/.cache/clangd/index/run_grouped_gemm_scale_softmax_gemm_permute.inc.0CAE0C43C07E96CE.idx b/.cache/clangd/index/run_grouped_gemm_scale_softmax_gemm_permute.inc.0CAE0C43C07E96CE.idx new file mode 100755 index 0000000000000000000000000000000000000000..5b78a1f6b9d054f8aeaec101391eccf112d3be91 Binary files /dev/null and b/.cache/clangd/index/run_grouped_gemm_scale_softmax_gemm_permute.inc.0CAE0C43C07E96CE.idx differ diff --git a/.cache/clangd/index/run_groupnorm_fwd_example.inc.85BD21F884D2FB4A.idx b/.cache/clangd/index/run_groupnorm_fwd_example.inc.85BD21F884D2FB4A.idx new file mode 100755 index 0000000000000000000000000000000000000000..f885b602dd2455bb90e3a160b64ba971a21052cc Binary files /dev/null and b/.cache/clangd/index/run_groupnorm_fwd_example.inc.85BD21F884D2FB4A.idx differ diff --git a/.cache/clangd/index/run_layernorm4d_fwd_example.inc.8870D57CA6553449.idx b/.cache/clangd/index/run_layernorm4d_fwd_example.inc.8870D57CA6553449.idx new file mode 100755 index 0000000000000000000000000000000000000000..c6ce91b106b3f9fe1b1fb9336b8ad3142f8debe8 Binary files /dev/null and b/.cache/clangd/index/run_layernorm4d_fwd_example.inc.8870D57CA6553449.idx differ diff --git a/.cache/clangd/index/run_layernorm_example.inc.DA0460DDD4EF632F.idx b/.cache/clangd/index/run_layernorm_example.inc.DA0460DDD4EF632F.idx new file mode 100755 index 0000000000000000000000000000000000000000..93845507d106fdb92e26a4753146c8ab2c94d9e3 Binary files /dev/null and b/.cache/clangd/index/run_layernorm_example.inc.DA0460DDD4EF632F.idx differ diff --git a/.cache/clangd/index/run_permute_bundle_example.inc.419BC377BBAE0EEA.idx b/.cache/clangd/index/run_permute_bundle_example.inc.419BC377BBAE0EEA.idx new file mode 100755 index 0000000000000000000000000000000000000000..4a84c04a1c114259904713f17d38b970442de7e5 Binary files /dev/null and b/.cache/clangd/index/run_permute_bundle_example.inc.419BC377BBAE0EEA.idx differ diff --git a/.cache/clangd/index/run_permute_element_example.inc.42CC4C3518B6F771.idx b/.cache/clangd/index/run_permute_element_example.inc.42CC4C3518B6F771.idx new file mode 100755 index 0000000000000000000000000000000000000000..94fa1311cfbd102fcc0959c197dd5d7b6cca2989 Binary files /dev/null and b/.cache/clangd/index/run_permute_element_example.inc.42CC4C3518B6F771.idx differ diff --git a/.cache/clangd/index/run_splitK_gemm_example.inc.51FAC96E5EFEA0F0.idx b/.cache/clangd/index/run_splitK_gemm_example.inc.51FAC96E5EFEA0F0.idx new file mode 100755 index 0000000000000000000000000000000000000000..1ddffcde2042374734d7bcea3205297065da3f91 Binary files /dev/null and b/.cache/clangd/index/run_splitK_gemm_example.inc.51FAC96E5EFEA0F0.idx differ diff --git a/.cache/clangd/index/sequence.hpp.25AC5091073FF3EB.idx b/.cache/clangd/index/sequence.hpp.25AC5091073FF3EB.idx new file mode 100755 index 0000000000000000000000000000000000000000..d23d93d8d3e7852371c7e46c2a2c23184ab5e132 Binary files /dev/null and b/.cache/clangd/index/sequence.hpp.25AC5091073FF3EB.idx differ diff --git a/.cache/clangd/index/sequence_helper.hpp.2282D53015E164C0.idx b/.cache/clangd/index/sequence_helper.hpp.2282D53015E164C0.idx new file mode 100755 index 0000000000000000000000000000000000000000..08824b8fba7759a5bbd3c571167333dff85fcee7 Binary files /dev/null and b/.cache/clangd/index/sequence_helper.hpp.2282D53015E164C0.idx differ diff --git a/.cache/clangd/index/softmax.hpp.2F97791F40A73FC9.idx b/.cache/clangd/index/softmax.hpp.2F97791F40A73FC9.idx new file mode 100755 index 0000000000000000000000000000000000000000..ff5a874c69a9ff2f3c61aa48cb3159e162356895 Binary files /dev/null and b/.cache/clangd/index/softmax.hpp.2F97791F40A73FC9.idx differ diff --git a/.cache/clangd/index/softmax_blockwise.cpp.3EE65033A8858975.idx b/.cache/clangd/index/softmax_blockwise.cpp.3EE65033A8858975.idx new file mode 100755 index 0000000000000000000000000000000000000000..4653326c5413bc1dfa79c197f16103e96ffdc114 Binary files /dev/null and b/.cache/clangd/index/softmax_blockwise.cpp.3EE65033A8858975.idx differ diff --git a/.cache/clangd/index/space_filling_curve.cpp.EB322F6A66137D22.idx b/.cache/clangd/index/space_filling_curve.cpp.EB322F6A66137D22.idx new file mode 100755 index 0000000000000000000000000000000000000000..84f616a5d324e8eb8cebc202fdd48b4e9ead6c64 Binary files /dev/null and b/.cache/clangd/index/space_filling_curve.cpp.EB322F6A66137D22.idx differ diff --git a/.cache/clangd/index/span.hpp.D6316D1D55F0830C.idx b/.cache/clangd/index/span.hpp.D6316D1D55F0830C.idx new file mode 100755 index 0000000000000000000000000000000000000000..a7494073071b4b975f238b58e16552aed6501c77 Binary files /dev/null and b/.cache/clangd/index/span.hpp.D6316D1D55F0830C.idx differ diff --git a/.cache/clangd/index/sparse_embedding3_forward_layernorm.cpp.CF5214A930A81517.idx b/.cache/clangd/index/sparse_embedding3_forward_layernorm.cpp.CF5214A930A81517.idx new file mode 100755 index 0000000000000000000000000000000000000000..63d2b0df5697604cb26960f59663bfb5dd59bd96 Binary files /dev/null and b/.cache/clangd/index/sparse_embedding3_forward_layernorm.cpp.CF5214A930A81517.idx differ diff --git a/.cache/clangd/index/splitK_gemm_xdl_bf16.cpp.AB98F65366A9F1A9.idx b/.cache/clangd/index/splitK_gemm_xdl_bf16.cpp.AB98F65366A9F1A9.idx new file mode 100755 index 0000000000000000000000000000000000000000..9c28e906b8eaeef1784ff97682bc53f94e38bdcd Binary files /dev/null and b/.cache/clangd/index/splitK_gemm_xdl_bf16.cpp.AB98F65366A9F1A9.idx differ diff --git a/.cache/clangd/index/splitK_gemm_xdl_fp16.cpp.DBCD688A2F933855.idx b/.cache/clangd/index/splitK_gemm_xdl_fp16.cpp.DBCD688A2F933855.idx new file mode 100755 index 0000000000000000000000000000000000000000..d41899de2ba53e0fed8019152d0447fbc2b4a1af Binary files /dev/null and b/.cache/clangd/index/splitK_gemm_xdl_fp16.cpp.DBCD688A2F933855.idx differ diff --git a/.cache/clangd/index/splitK_gemm_xdl_fp16_fp8.cpp.60034E8F6043ABD9.idx b/.cache/clangd/index/splitK_gemm_xdl_fp16_fp8.cpp.60034E8F6043ABD9.idx new file mode 100755 index 0000000000000000000000000000000000000000..3506d242fd6919d3e5473f6ffcf35dce21e62f49 Binary files /dev/null and b/.cache/clangd/index/splitK_gemm_xdl_fp16_fp8.cpp.60034E8F6043ABD9.idx differ diff --git a/.cache/clangd/index/splitK_gemm_xdl_fp32.cpp.A3B8F8ECA755E6B2.idx b/.cache/clangd/index/splitK_gemm_xdl_fp32.cpp.A3B8F8ECA755E6B2.idx new file mode 100755 index 0000000000000000000000000000000000000000..6bfd4425b355fe5015feba8b0a500f4d55dacc18 Binary files /dev/null and b/.cache/clangd/index/splitK_gemm_xdl_fp32.cpp.A3B8F8ECA755E6B2.idx differ diff --git a/.cache/clangd/index/splitK_gemm_xdl_int8.cpp.4600C416992C0160.idx b/.cache/clangd/index/splitK_gemm_xdl_int8.cpp.4600C416992C0160.idx new file mode 100755 index 0000000000000000000000000000000000000000..dcba2b253b5da8943b1ada169f4638e9a3195e79 Binary files /dev/null and b/.cache/clangd/index/splitK_gemm_xdl_int8.cpp.4600C416992C0160.idx differ diff --git a/.cache/clangd/index/splitK_gemm_xdl_lds_direct_load_fp16.cpp.2D633E2EB00CCD78.idx b/.cache/clangd/index/splitK_gemm_xdl_lds_direct_load_fp16.cpp.2D633E2EB00CCD78.idx new file mode 100755 index 0000000000000000000000000000000000000000..2b8f7f1545495dd497bf96bc990b162660676117 Binary files /dev/null and b/.cache/clangd/index/splitK_gemm_xdl_lds_direct_load_fp16.cpp.2D633E2EB00CCD78.idx differ diff --git a/.cache/clangd/index/splitk_gemm_bias_e_permute_xdl_fp16.cpp.3597E35811A1130C.idx b/.cache/clangd/index/splitk_gemm_bias_e_permute_xdl_fp16.cpp.3597E35811A1130C.idx new file mode 100755 index 0000000000000000000000000000000000000000..d18dbd1b76943382064a67fdd22034d7d7ee4a33 Binary files /dev/null and b/.cache/clangd/index/splitk_gemm_bias_e_permute_xdl_fp16.cpp.3597E35811A1130C.idx differ diff --git a/.cache/clangd/index/splitk_gemm_bias_e_permute_xdl_fp32.cpp.8A4C951A87984080.idx b/.cache/clangd/index/splitk_gemm_bias_e_permute_xdl_fp32.cpp.8A4C951A87984080.idx new file mode 100755 index 0000000000000000000000000000000000000000..79ce2caa39e81bd6c1fa560865ad26b99a7542d7 Binary files /dev/null and b/.cache/clangd/index/splitk_gemm_bias_e_permute_xdl_fp32.cpp.8A4C951A87984080.idx differ diff --git a/.cache/clangd/index/static_buffer.hpp.E059C0D43CFA4618.idx b/.cache/clangd/index/static_buffer.hpp.E059C0D43CFA4618.idx new file mode 100755 index 0000000000000000000000000000000000000000..679748e6a592339c3a13effe42583ff922321071 Binary files /dev/null and b/.cache/clangd/index/static_buffer.hpp.E059C0D43CFA4618.idx differ diff --git a/.cache/clangd/index/static_tensor.hpp.55059299885EB585.idx b/.cache/clangd/index/static_tensor.hpp.55059299885EB585.idx new file mode 100755 index 0000000000000000000000000000000000000000..27ada298945d2cb7bf610f7622a5d617a25cd92a Binary files /dev/null and b/.cache/clangd/index/static_tensor.hpp.55059299885EB585.idx differ diff --git a/.cache/clangd/index/statically_indexed_array.hpp.8A80A7A4CC47B954.idx b/.cache/clangd/index/statically_indexed_array.hpp.8A80A7A4CC47B954.idx new file mode 100755 index 0000000000000000000000000000000000000000..4e71ab4ae2b9950d038ff9eb3da87b0e1460212b Binary files /dev/null and b/.cache/clangd/index/statically_indexed_array.hpp.8A80A7A4CC47B954.idx differ diff --git a/.cache/clangd/index/statically_indexed_array_multi_index.hpp.E363636635FF265A.idx b/.cache/clangd/index/statically_indexed_array_multi_index.hpp.E363636635FF265A.idx new file mode 100755 index 0000000000000000000000000000000000000000..b938c546048b9dea2700c3874a28e58ad2867ff9 Binary files /dev/null and b/.cache/clangd/index/statically_indexed_array_multi_index.hpp.E363636635FF265A.idx differ diff --git a/.cache/clangd/index/stream_config.hpp.015AF40CD82DD077.idx b/.cache/clangd/index/stream_config.hpp.015AF40CD82DD077.idx new file mode 100755 index 0000000000000000000000000000000000000000..00718ad8b7637f281e142ec29475f54ac79a939b Binary files /dev/null and b/.cache/clangd/index/stream_config.hpp.015AF40CD82DD077.idx differ diff --git a/.cache/clangd/index/stream_utility.hpp.7DD00DAAC010212E.idx b/.cache/clangd/index/stream_utility.hpp.7DD00DAAC010212E.idx new file mode 100755 index 0000000000000000000000000000000000000000..e44387cf8f06b1407ad5b0f2d2e44872cca20d3c Binary files /dev/null and b/.cache/clangd/index/stream_utility.hpp.7DD00DAAC010212E.idx differ diff --git a/.cache/clangd/index/synchronization.hpp.94B924A949B60CF0.idx b/.cache/clangd/index/synchronization.hpp.94B924A949B60CF0.idx new file mode 100755 index 0000000000000000000000000000000000000000..978a463d320b51ad5e088fbeccfc4d005141585c Binary files /dev/null and b/.cache/clangd/index/synchronization.hpp.94B924A949B60CF0.idx differ diff --git a/.cache/clangd/index/tensor.hpp.C84FDD70C53B91FD.idx b/.cache/clangd/index/tensor.hpp.C84FDD70C53B91FD.idx new file mode 100755 index 0000000000000000000000000000000000000000..06c01cec006740d5890094eeaa27e1043c3a5f33 Binary files /dev/null and b/.cache/clangd/index/tensor.hpp.C84FDD70C53B91FD.idx differ diff --git a/.cache/clangd/index/tensor_adaptor.hpp.1AABBD498E3DDABA.idx b/.cache/clangd/index/tensor_adaptor.hpp.1AABBD498E3DDABA.idx new file mode 100755 index 0000000000000000000000000000000000000000..78e734459e123eb4e1943df784e372d615232b04 Binary files /dev/null and b/.cache/clangd/index/tensor_adaptor.hpp.1AABBD498E3DDABA.idx differ diff --git a/.cache/clangd/index/tensor_descriptor.hpp.7E73ABD05058119F.idx b/.cache/clangd/index/tensor_descriptor.hpp.7E73ABD05058119F.idx new file mode 100755 index 0000000000000000000000000000000000000000..50e2c601b672f7a4d9f9b165a4badb46147bb62c Binary files /dev/null and b/.cache/clangd/index/tensor_descriptor.hpp.7E73ABD05058119F.idx differ diff --git a/.cache/clangd/index/tensor_descriptor_helper.hpp.B97BC6F42C53AF70.idx b/.cache/clangd/index/tensor_descriptor_helper.hpp.B97BC6F42C53AF70.idx new file mode 100755 index 0000000000000000000000000000000000000000..5f4f3ae090bf030fbb4d66a57bf9c4d698d14451 Binary files /dev/null and b/.cache/clangd/index/tensor_descriptor_helper.hpp.B97BC6F42C53AF70.idx differ diff --git a/.cache/clangd/index/tensor_layout.hpp.83E496410F43D7E9.idx b/.cache/clangd/index/tensor_layout.hpp.83E496410F43D7E9.idx new file mode 100755 index 0000000000000000000000000000000000000000..413add1ed1b447b1b6824c3a58fb8a898e2b9ecc Binary files /dev/null and b/.cache/clangd/index/tensor_layout.hpp.83E496410F43D7E9.idx differ diff --git a/.cache/clangd/index/tensor_partition.hpp.2F54ACF04D747753.idx b/.cache/clangd/index/tensor_partition.hpp.2F54ACF04D747753.idx new file mode 100755 index 0000000000000000000000000000000000000000..c0acbb0cfd90af28f89e512a04a6f3e3d0df0c78 Binary files /dev/null and b/.cache/clangd/index/tensor_partition.hpp.2F54ACF04D747753.idx differ diff --git a/.cache/clangd/index/tensor_space_filling_curve.hpp.058953EFBD7A2B25.idx b/.cache/clangd/index/tensor_space_filling_curve.hpp.058953EFBD7A2B25.idx new file mode 100755 index 0000000000000000000000000000000000000000..ad114cdc236cffbce2ed061784f424edf16c6623 Binary files /dev/null and b/.cache/clangd/index/tensor_space_filling_curve.hpp.058953EFBD7A2B25.idx differ diff --git a/.cache/clangd/index/tensor_specialization.hpp.5BC1AF8D88F729F2.idx b/.cache/clangd/index/tensor_specialization.hpp.5BC1AF8D88F729F2.idx new file mode 100755 index 0000000000000000000000000000000000000000..d113e54a34d1008908627ed5bedaeca7ed6197e6 Binary files /dev/null and b/.cache/clangd/index/tensor_specialization.hpp.5BC1AF8D88F729F2.idx differ diff --git a/.cache/clangd/index/tensor_utils.hpp.B92D33FF5A9EED5B.idx b/.cache/clangd/index/tensor_utils.hpp.B92D33FF5A9EED5B.idx new file mode 100755 index 0000000000000000000000000000000000000000..5bdd303497dd37ecec45f79b2015afcb51f30e06 Binary files /dev/null and b/.cache/clangd/index/tensor_utils.hpp.B92D33FF5A9EED5B.idx differ diff --git a/.cache/clangd/index/test_avg_pool3d_bwd.cpp.6DD8FAFEF9A89C97.idx b/.cache/clangd/index/test_avg_pool3d_bwd.cpp.6DD8FAFEF9A89C97.idx new file mode 100755 index 0000000000000000000000000000000000000000..2420f306fe7ee96210c531c3d639a22524472f62 Binary files /dev/null and b/.cache/clangd/index/test_avg_pool3d_bwd.cpp.6DD8FAFEF9A89C97.idx differ diff --git a/.cache/clangd/index/test_avg_pool3d_fwd.cpp.5274F49320881D42.idx b/.cache/clangd/index/test_avg_pool3d_fwd.cpp.5274F49320881D42.idx new file mode 100755 index 0000000000000000000000000000000000000000..d9f03ea0a12a3c5bface0cd94509cd34f5805c04 Binary files /dev/null and b/.cache/clangd/index/test_avg_pool3d_fwd.cpp.5274F49320881D42.idx differ diff --git a/.cache/clangd/index/test_batched_gemm.cpp.A022B37F45A32155.idx b/.cache/clangd/index/test_batched_gemm.cpp.A022B37F45A32155.idx new file mode 100755 index 0000000000000000000000000000000000000000..aab8245f5887c35ba01372f50f70a3e9d77f883b Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm.cpp.A022B37F45A32155.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp.52CCAAF6582C3CD0.idx b/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp.52CCAAF6582C3CD0.idx new file mode 100755 index 0000000000000000000000000000000000000000..ab5b34b003d10dc3e54d40bbf41e8aaaba820284 Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp.52CCAAF6582C3CD0.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp.FB71D53BC1DE7E13.idx b/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp.FB71D53BC1DE7E13.idx new file mode 100755 index 0000000000000000000000000000000000000000..1208380bcf6571e2109efec2b978ad1e6309aec4 Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp.FB71D53BC1DE7E13.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_util.hpp.38E1B00A8B9BAD43.idx b/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_util.hpp.38E1B00A8B9BAD43.idx new file mode 100755 index 0000000000000000000000000000000000000000..2b57bd59bebf41fd0d06986a7d0a8ae2bf3fd4f4 Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_util.hpp.38E1B00A8B9BAD43.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_gemm_fp16.cpp.A1B1DD17614F30D3.idx b/.cache/clangd/index/test_batched_gemm_gemm_fp16.cpp.A1B1DD17614F30D3.idx new file mode 100755 index 0000000000000000000000000000000000000000..c72336ba242faf47b44e1f0b2d35f9938c49693a Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_gemm_fp16.cpp.A1B1DD17614F30D3.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_gemm_util.hpp.1FF4EC29A4661CFF.idx b/.cache/clangd/index/test_batched_gemm_gemm_util.hpp.1FF4EC29A4661CFF.idx new file mode 100755 index 0000000000000000000000000000000000000000..24635c22856b93a3961fa76b73ce65e2e756d0fa Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_gemm_util.hpp.1FF4EC29A4661CFF.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_softmax_gemm_fp16.cpp.0592033B8F1CD807.idx b/.cache/clangd/index/test_batched_gemm_softmax_gemm_fp16.cpp.0592033B8F1CD807.idx new file mode 100755 index 0000000000000000000000000000000000000000..f43f2575278135871115a10bd4f6145177cabc82 Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_softmax_gemm_fp16.cpp.0592033B8F1CD807.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_bf16.cpp.EDC4F40BC781A6DC.idx b/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_bf16.cpp.EDC4F40BC781A6DC.idx new file mode 100755 index 0000000000000000000000000000000000000000..b455b4e79c7ec886c8cd99480cff86e5dbcb671b Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_bf16.cpp.EDC4F40BC781A6DC.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_fp16.cpp.0D1473C6C2BC0BFC.idx b/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_fp16.cpp.0D1473C6C2BC0BFC.idx new file mode 100755 index 0000000000000000000000000000000000000000..a1a75f4b8b18a70ac31cbe561227bb5f4660aba3 Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_fp16.cpp.0D1473C6C2BC0BFC.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_util.hpp.DD433B78FEA57450.idx b/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_util.hpp.DD433B78FEA57450.idx new file mode 100755 index 0000000000000000000000000000000000000000..1813740dea53131d2a1f602f50e5b8899964daae Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_util.hpp.DD433B78FEA57450.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_softmax_gemm_util.hpp.C46CFF8584087968.idx b/.cache/clangd/index/test_batched_gemm_softmax_gemm_util.hpp.C46CFF8584087968.idx new file mode 100755 index 0000000000000000000000000000000000000000..69bc64742ced2f6e28755a8e021a8d509905aff2 Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_softmax_gemm_util.hpp.C46CFF8584087968.idx differ diff --git a/.cache/clangd/index/test_bf8.cpp.16F0A495F47E95CC.idx b/.cache/clangd/index/test_bf8.cpp.16F0A495F47E95CC.idx new file mode 100755 index 0000000000000000000000000000000000000000..0682e3b1e6370ae893ee82bbea722e8b05e262cc Binary files /dev/null and b/.cache/clangd/index/test_bf8.cpp.16F0A495F47E95CC.idx differ diff --git a/.cache/clangd/index/test_block_to_ctile_map.cpp.D0968ED6B32C18D9.idx b/.cache/clangd/index/test_block_to_ctile_map.cpp.D0968ED6B32C18D9.idx new file mode 100755 index 0000000000000000000000000000000000000000..353428a23ac1548c5e5470639888423466ffaf52 Binary files /dev/null and b/.cache/clangd/index/test_block_to_ctile_map.cpp.D0968ED6B32C18D9.idx differ diff --git a/.cache/clangd/index/test_contraction.cpp.6A4280349E6B5DE6.idx b/.cache/clangd/index/test_contraction.cpp.6A4280349E6B5DE6.idx new file mode 100755 index 0000000000000000000000000000000000000000..256ef6d681e5842ec7e34c2bab383ee99a1bb41e Binary files /dev/null and b/.cache/clangd/index/test_contraction.cpp.6A4280349E6B5DE6.idx differ diff --git a/.cache/clangd/index/test_contraction_interface.cpp.51D57FFCD74AC94A.idx b/.cache/clangd/index/test_contraction_interface.cpp.51D57FFCD74AC94A.idx new file mode 100755 index 0000000000000000000000000000000000000000..0bd764ffd83c70f8a713c14b1b00aa3e7570ad22 Binary files /dev/null and b/.cache/clangd/index/test_contraction_interface.cpp.51D57FFCD74AC94A.idx differ diff --git a/.cache/clangd/index/test_conv_tensor_rearrange.cpp.69AF0D8F1794ACAD.idx b/.cache/clangd/index/test_conv_tensor_rearrange.cpp.69AF0D8F1794ACAD.idx new file mode 100755 index 0000000000000000000000000000000000000000..67659fc9758e4ac2b57ccaddf224ac31b0bcc2c4 Binary files /dev/null and b/.cache/clangd/index/test_conv_tensor_rearrange.cpp.69AF0D8F1794ACAD.idx differ diff --git a/.cache/clangd/index/test_conv_tensor_rearrange_interface.cpp.6D91D3DCA5B6D134.idx b/.cache/clangd/index/test_conv_tensor_rearrange_interface.cpp.6D91D3DCA5B6D134.idx new file mode 100755 index 0000000000000000000000000000000000000000..00cc6564d64404e013626c0cfb4f5a0d6708dab3 Binary files /dev/null and b/.cache/clangd/index/test_conv_tensor_rearrange_interface.cpp.6D91D3DCA5B6D134.idx differ diff --git a/.cache/clangd/index/test_elementwise_layernorm_fp16.cpp.6E7042446B0FE0D3.idx b/.cache/clangd/index/test_elementwise_layernorm_fp16.cpp.6E7042446B0FE0D3.idx new file mode 100755 index 0000000000000000000000000000000000000000..719f0893730c85e9d9c2bec76cb2fe93f96785db Binary files /dev/null and b/.cache/clangd/index/test_elementwise_layernorm_fp16.cpp.6E7042446B0FE0D3.idx differ diff --git a/.cache/clangd/index/test_fp8.cpp.AF44E860BEF4215C.idx b/.cache/clangd/index/test_fp8.cpp.AF44E860BEF4215C.idx new file mode 100755 index 0000000000000000000000000000000000000000..a927a367158e191a351b94f7a52fcf13588e2fb0 Binary files /dev/null and b/.cache/clangd/index/test_fp8.cpp.AF44E860BEF4215C.idx differ diff --git a/.cache/clangd/index/test_gemm_add.hpp.EFC09082310C599D.idx b/.cache/clangd/index/test_gemm_add.hpp.EFC09082310C599D.idx new file mode 100755 index 0000000000000000000000000000000000000000..8007f921e36b34f6ccb99b943326eeacea406cb2 Binary files /dev/null and b/.cache/clangd/index/test_gemm_add.hpp.EFC09082310C599D.idx differ diff --git a/.cache/clangd/index/test_gemm_add_fastgelu.cpp.9C90F62B27922BE9.idx b/.cache/clangd/index/test_gemm_add_fastgelu.cpp.9C90F62B27922BE9.idx new file mode 100755 index 0000000000000000000000000000000000000000..349660da6ab613341881743ac5f2b07db2659ffa Binary files /dev/null and b/.cache/clangd/index/test_gemm_add_fastgelu.cpp.9C90F62B27922BE9.idx differ diff --git a/.cache/clangd/index/test_gemm_add_relu.cpp.6A11991389EEE204.idx b/.cache/clangd/index/test_gemm_add_relu.cpp.6A11991389EEE204.idx new file mode 100755 index 0000000000000000000000000000000000000000..81a4022ce4dfcb0bccffdff2831b07494913afea Binary files /dev/null and b/.cache/clangd/index/test_gemm_add_relu.cpp.6A11991389EEE204.idx differ diff --git a/.cache/clangd/index/test_gemm_add_relu_add_layernorm_fp16.cpp.E9706C96D2E415C1.idx b/.cache/clangd/index/test_gemm_add_relu_add_layernorm_fp16.cpp.E9706C96D2E415C1.idx new file mode 100755 index 0000000000000000000000000000000000000000..5e92621d55ca400c37aaf208add2dcfb12c02069 Binary files /dev/null and b/.cache/clangd/index/test_gemm_add_relu_add_layernorm_fp16.cpp.E9706C96D2E415C1.idx differ diff --git a/.cache/clangd/index/test_gemm_add_silu.cpp.0C3DE18B907F5A14.idx b/.cache/clangd/index/test_gemm_add_silu.cpp.0C3DE18B907F5A14.idx new file mode 100755 index 0000000000000000000000000000000000000000..2b0530567e9416dd4f94550fbcb6e3d0ececef4c Binary files /dev/null and b/.cache/clangd/index/test_gemm_add_silu.cpp.0C3DE18B907F5A14.idx differ diff --git a/.cache/clangd/index/test_gemm_splitk.cpp.921AD8C68523E94F.idx b/.cache/clangd/index/test_gemm_splitk.cpp.921AD8C68523E94F.idx new file mode 100755 index 0000000000000000000000000000000000000000..dab26e89a6e4e378d195b21d5cf184e9f29929a3 Binary files /dev/null and b/.cache/clangd/index/test_gemm_splitk.cpp.921AD8C68523E94F.idx differ diff --git a/.cache/clangd/index/test_gemm_splitk_ut_cases.inc.CD038407CD6C080D.idx b/.cache/clangd/index/test_gemm_splitk_ut_cases.inc.CD038407CD6C080D.idx new file mode 100755 index 0000000000000000000000000000000000000000..6b4b30f5325025d1f1c1be0ed9d2a8d44c8b6e04 Binary files /dev/null and b/.cache/clangd/index/test_gemm_splitk_ut_cases.inc.CD038407CD6C080D.idx differ diff --git a/.cache/clangd/index/test_gemm_splitk_util.hpp.D5E2B28B24FEB85E.idx b/.cache/clangd/index/test_gemm_splitk_util.hpp.D5E2B28B24FEB85E.idx new file mode 100755 index 0000000000000000000000000000000000000000..6168bf6662508bf8f4890d5b3b8bbb92ea017ab2 Binary files /dev/null and b/.cache/clangd/index/test_gemm_splitk_util.hpp.D5E2B28B24FEB85E.idx differ diff --git a/.cache/clangd/index/test_grouped_convnd_bwd_data.cpp.BA5BCB136A7B5CF1.idx b/.cache/clangd/index/test_grouped_convnd_bwd_data.cpp.BA5BCB136A7B5CF1.idx new file mode 100755 index 0000000000000000000000000000000000000000..b99e72e84ad670963a902ae1b3f8744be53dc049 Binary files /dev/null and b/.cache/clangd/index/test_grouped_convnd_bwd_data.cpp.BA5BCB136A7B5CF1.idx differ diff --git a/.cache/clangd/index/test_grouped_convnd_bwd_data_interface_xdl.cpp.8747321B4790F80F.idx b/.cache/clangd/index/test_grouped_convnd_bwd_data_interface_xdl.cpp.8747321B4790F80F.idx new file mode 100755 index 0000000000000000000000000000000000000000..668ec02b1c6c69288101ec3366d9c6ccc4d1e678 Binary files /dev/null and b/.cache/clangd/index/test_grouped_convnd_bwd_data_interface_xdl.cpp.8747321B4790F80F.idx differ diff --git a/.cache/clangd/index/test_grouped_convnd_bwd_weight.cpp.1E0C24433031CA72.idx b/.cache/clangd/index/test_grouped_convnd_bwd_weight.cpp.1E0C24433031CA72.idx new file mode 100755 index 0000000000000000000000000000000000000000..ec2bf58c50f5a602adaf83570cb0798cf668195a Binary files /dev/null and b/.cache/clangd/index/test_grouped_convnd_bwd_weight.cpp.1E0C24433031CA72.idx differ diff --git a/.cache/clangd/index/test_grouped_convnd_bwd_weight_interface_xdl.cpp.B93D201E166036D3.idx b/.cache/clangd/index/test_grouped_convnd_bwd_weight_interface_xdl.cpp.B93D201E166036D3.idx new file mode 100755 index 0000000000000000000000000000000000000000..453c299c2f83f82fc80e4f3fb399ae9d25ef6fe6 Binary files /dev/null and b/.cache/clangd/index/test_grouped_convnd_bwd_weight_interface_xdl.cpp.B93D201E166036D3.idx differ diff --git a/.cache/clangd/index/test_grouped_convnd_fwd.cpp.5B423DD33073B028.idx b/.cache/clangd/index/test_grouped_convnd_fwd.cpp.5B423DD33073B028.idx new file mode 100755 index 0000000000000000000000000000000000000000..c8e54471bdd88720dc4768642eb4217bda60339b Binary files /dev/null and b/.cache/clangd/index/test_grouped_convnd_fwd.cpp.5B423DD33073B028.idx differ diff --git a/.cache/clangd/index/test_grouped_convnd_fwd_multi_ab_interface.cpp.D44C4377C972C730.idx b/.cache/clangd/index/test_grouped_convnd_fwd_multi_ab_interface.cpp.D44C4377C972C730.idx new file mode 100755 index 0000000000000000000000000000000000000000..b39ff4518cb96db4c6d75ce7ac791e138a2b78f7 Binary files /dev/null and b/.cache/clangd/index/test_grouped_convnd_fwd_multi_ab_interface.cpp.D44C4377C972C730.idx differ diff --git a/.cache/clangd/index/test_grouped_convnd_fwd_multi_d_interface_compatibility.cpp.D8EE1DD81FCEA622.idx b/.cache/clangd/index/test_grouped_convnd_fwd_multi_d_interface_compatibility.cpp.D8EE1DD81FCEA622.idx new file mode 100755 index 0000000000000000000000000000000000000000..621e701f41e288592d53fb4b136e2fdeec7a91e5 Binary files /dev/null and b/.cache/clangd/index/test_grouped_convnd_fwd_multi_d_interface_compatibility.cpp.D8EE1DD81FCEA622.idx differ diff --git a/.cache/clangd/index/test_grouped_gemm_interface.cpp.BFD580574FDC4028.idx b/.cache/clangd/index/test_grouped_gemm_interface.cpp.BFD580574FDC4028.idx new file mode 100755 index 0000000000000000000000000000000000000000..5394c22e180a0478b2de59174b15f40645125ebd Binary files /dev/null and b/.cache/clangd/index/test_grouped_gemm_interface.cpp.BFD580574FDC4028.idx differ diff --git a/.cache/clangd/index/test_grouped_gemm_splitk.cpp.1985BB1B09859227.idx b/.cache/clangd/index/test_grouped_gemm_splitk.cpp.1985BB1B09859227.idx new file mode 100755 index 0000000000000000000000000000000000000000..5b58186ef44040e5ba8acaf3b5f7c629bfc3153d Binary files /dev/null and b/.cache/clangd/index/test_grouped_gemm_splitk.cpp.1985BB1B09859227.idx differ diff --git a/.cache/clangd/index/test_grouped_gemm_ut_cases.inc.B622DD65303F1D04.idx b/.cache/clangd/index/test_grouped_gemm_ut_cases.inc.B622DD65303F1D04.idx new file mode 100755 index 0000000000000000000000000000000000000000..8001fb3df3f16921de31a2ebc3fb94145acdac93 Binary files /dev/null and b/.cache/clangd/index/test_grouped_gemm_ut_cases.inc.B622DD65303F1D04.idx differ diff --git a/.cache/clangd/index/test_grouped_gemm_util.hpp.62D7D32AEF140979.idx b/.cache/clangd/index/test_grouped_gemm_util.hpp.62D7D32AEF140979.idx new file mode 100755 index 0000000000000000000000000000000000000000..976f0d875565df1e3401e5ba5a90c147ebafe2d7 Binary files /dev/null and b/.cache/clangd/index/test_grouped_gemm_util.hpp.62D7D32AEF140979.idx differ diff --git a/.cache/clangd/index/test_groupnorm_bwd_data_fp32.cpp.EF83656CE5FBCD24.idx b/.cache/clangd/index/test_groupnorm_bwd_data_fp32.cpp.EF83656CE5FBCD24.idx new file mode 100755 index 0000000000000000000000000000000000000000..87519167b0e027ee73a495bae8813a9ea4cede12 Binary files /dev/null and b/.cache/clangd/index/test_groupnorm_bwd_data_fp32.cpp.EF83656CE5FBCD24.idx differ diff --git a/.cache/clangd/index/test_groupnorm_bwd_gamma_beta_fp32.cpp.266D9654B5E1C67F.idx b/.cache/clangd/index/test_groupnorm_bwd_gamma_beta_fp32.cpp.266D9654B5E1C67F.idx new file mode 100755 index 0000000000000000000000000000000000000000..a64ab64d811c58914cfb356ecc5336fb1d64f884 Binary files /dev/null and b/.cache/clangd/index/test_groupnorm_bwd_gamma_beta_fp32.cpp.266D9654B5E1C67F.idx differ diff --git a/.cache/clangd/index/test_groupnorm_fwd_fp16.cpp.E01BDD088F0473FE.idx b/.cache/clangd/index/test_groupnorm_fwd_fp16.cpp.E01BDD088F0473FE.idx new file mode 100755 index 0000000000000000000000000000000000000000..15fac964c6ecbf12fe850b6403ac8b1dce6a96ac Binary files /dev/null and b/.cache/clangd/index/test_groupnorm_fwd_fp16.cpp.E01BDD088F0473FE.idx differ diff --git a/.cache/clangd/index/test_groupnorm_fwd_fp32.cpp.06336278490A0D06.idx b/.cache/clangd/index/test_groupnorm_fwd_fp32.cpp.06336278490A0D06.idx new file mode 100755 index 0000000000000000000000000000000000000000..63ca52661e0649e3ca8681760cd775526ff3daa3 Binary files /dev/null and b/.cache/clangd/index/test_groupnorm_fwd_fp32.cpp.06336278490A0D06.idx differ diff --git a/.cache/clangd/index/test_layernorm2d_bwd_data_fp32.cpp.65C39944BBD4A16D.idx b/.cache/clangd/index/test_layernorm2d_bwd_data_fp32.cpp.65C39944BBD4A16D.idx new file mode 100755 index 0000000000000000000000000000000000000000..d6adb6e5cc79079cb8894c1baa47271233836b42 Binary files /dev/null and b/.cache/clangd/index/test_layernorm2d_bwd_data_fp32.cpp.65C39944BBD4A16D.idx differ diff --git a/.cache/clangd/index/test_layernorm2d_bwd_gamma_beta_fp32.cpp.B3B6429EB0BBF1C4.idx b/.cache/clangd/index/test_layernorm2d_bwd_gamma_beta_fp32.cpp.B3B6429EB0BBF1C4.idx new file mode 100755 index 0000000000000000000000000000000000000000..77f0325b797a1a14a578a6d502ba8ab62f22d97c Binary files /dev/null and b/.cache/clangd/index/test_layernorm2d_bwd_gamma_beta_fp32.cpp.B3B6429EB0BBF1C4.idx differ diff --git a/.cache/clangd/index/test_layernorm2d_fwd_fp16.cpp.49CB42570B950D66.idx b/.cache/clangd/index/test_layernorm2d_fwd_fp16.cpp.49CB42570B950D66.idx new file mode 100755 index 0000000000000000000000000000000000000000..dc2d7531a9572bd8489abb20bff63cfd17d05d48 Binary files /dev/null and b/.cache/clangd/index/test_layernorm2d_fwd_fp16.cpp.49CB42570B950D66.idx differ diff --git a/.cache/clangd/index/test_layernorm2d_fwd_fp32.cpp.57C9823195A1B889.idx b/.cache/clangd/index/test_layernorm2d_fwd_fp32.cpp.57C9823195A1B889.idx new file mode 100755 index 0000000000000000000000000000000000000000..bde3c4a0ed509cb6a29775964b4a2eb9296bf62c Binary files /dev/null and b/.cache/clangd/index/test_layernorm2d_fwd_fp32.cpp.57C9823195A1B889.idx differ diff --git a/.cache/clangd/index/test_layernorm4d_fwd_fp16.cpp.4CF1AE7F957B873D.idx b/.cache/clangd/index/test_layernorm4d_fwd_fp16.cpp.4CF1AE7F957B873D.idx new file mode 100755 index 0000000000000000000000000000000000000000..027de8bca2856845dcb60a026c4f3914337747b4 Binary files /dev/null and b/.cache/clangd/index/test_layernorm4d_fwd_fp16.cpp.4CF1AE7F957B873D.idx differ diff --git a/.cache/clangd/index/test_max_pool3d_bwd.cpp.AF800737CBB45892.idx b/.cache/clangd/index/test_max_pool3d_bwd.cpp.AF800737CBB45892.idx new file mode 100755 index 0000000000000000000000000000000000000000..791cfa96998fe321a40290b2936a7908bd916827 Binary files /dev/null and b/.cache/clangd/index/test_max_pool3d_bwd.cpp.AF800737CBB45892.idx differ diff --git a/.cache/clangd/index/test_max_pool3d_fwd.cpp.BE8A2BFD19420616.idx b/.cache/clangd/index/test_max_pool3d_fwd.cpp.BE8A2BFD19420616.idx new file mode 100755 index 0000000000000000000000000000000000000000..abc44f4fb7ab6fc7d188046377edab21fa0cf200 Binary files /dev/null and b/.cache/clangd/index/test_max_pool3d_fwd.cpp.BE8A2BFD19420616.idx differ diff --git a/.cache/clangd/index/test_permute_scale.cpp.0607B34D8846C932.idx b/.cache/clangd/index/test_permute_scale.cpp.0607B34D8846C932.idx new file mode 100755 index 0000000000000000000000000000000000000000..efaa9296eaed4d18bd03183f36c04a935ab7afe4 Binary files /dev/null and b/.cache/clangd/index/test_permute_scale.cpp.0607B34D8846C932.idx differ diff --git a/.cache/clangd/index/test_pool_fwd_common.hpp.CD53AEA46B3FF1EB.idx b/.cache/clangd/index/test_pool_fwd_common.hpp.CD53AEA46B3FF1EB.idx new file mode 100755 index 0000000000000000000000000000000000000000..1212340dedd8626ed85049bfa0ddd06ad68a6532 Binary files /dev/null and b/.cache/clangd/index/test_pool_fwd_common.hpp.CD53AEA46B3FF1EB.idx differ diff --git a/.cache/clangd/index/test_softmax_interface.cpp.9F0A118BAF3CB91E.idx b/.cache/clangd/index/test_softmax_interface.cpp.9F0A118BAF3CB91E.idx new file mode 100755 index 0000000000000000000000000000000000000000..ae6630a48e228f48ebf7cb38599549f2655445da Binary files /dev/null and b/.cache/clangd/index/test_softmax_interface.cpp.9F0A118BAF3CB91E.idx differ diff --git a/.cache/clangd/index/test_softmax_rank3.cpp.F74F60A3C98E1A4C.idx b/.cache/clangd/index/test_softmax_rank3.cpp.F74F60A3C98E1A4C.idx new file mode 100755 index 0000000000000000000000000000000000000000..3f4241702e553a456058dc94f98734dec75bba0b Binary files /dev/null and b/.cache/clangd/index/test_softmax_rank3.cpp.F74F60A3C98E1A4C.idx differ diff --git a/.cache/clangd/index/test_softmax_rank4.cpp.05FB4DA05EDB8025.idx b/.cache/clangd/index/test_softmax_rank4.cpp.05FB4DA05EDB8025.idx new file mode 100755 index 0000000000000000000000000000000000000000..87c8bd25709b13e712d33e72a84b696b17ee7717 Binary files /dev/null and b/.cache/clangd/index/test_softmax_rank4.cpp.05FB4DA05EDB8025.idx differ diff --git a/.cache/clangd/index/test_softmax_ut_cases.inc.A86C258A9B038466.idx b/.cache/clangd/index/test_softmax_ut_cases.inc.A86C258A9B038466.idx new file mode 100755 index 0000000000000000000000000000000000000000..eae4e8b7f8a8cb131694c496394cc386da5a437e Binary files /dev/null and b/.cache/clangd/index/test_softmax_ut_cases.inc.A86C258A9B038466.idx differ diff --git a/.cache/clangd/index/test_softmax_util.hpp.A85F4CCFF59B9D2C.idx b/.cache/clangd/index/test_softmax_util.hpp.A85F4CCFF59B9D2C.idx new file mode 100755 index 0000000000000000000000000000000000000000..d2126fc80a7f12e7208f32092d4c2b9c84734378 Binary files /dev/null and b/.cache/clangd/index/test_softmax_util.hpp.A85F4CCFF59B9D2C.idx differ diff --git a/.cache/clangd/index/test_transpose.cpp.EA23A250A71A4476.idx b/.cache/clangd/index/test_transpose.cpp.EA23A250A71A4476.idx new file mode 100755 index 0000000000000000000000000000000000000000..26fb935e79494c451449f75da4cc4af07a193773 Binary files /dev/null and b/.cache/clangd/index/test_transpose.cpp.EA23A250A71A4476.idx differ diff --git a/.cache/clangd/index/test_wrapper_copy.cpp.732CF6ED569867C1.idx b/.cache/clangd/index/test_wrapper_copy.cpp.732CF6ED569867C1.idx new file mode 100755 index 0000000000000000000000000000000000000000..b0822c47c858bac582e075891ea93ecbc03f3b88 Binary files /dev/null and b/.cache/clangd/index/test_wrapper_copy.cpp.732CF6ED569867C1.idx differ diff --git a/.cache/clangd/index/test_wrapper_gemm.cpp.74B7113BB3F74220.idx b/.cache/clangd/index/test_wrapper_gemm.cpp.74B7113BB3F74220.idx new file mode 100755 index 0000000000000000000000000000000000000000..888a4a2a8a4b91e9cf554bc0e7b58e529ec9d7c2 Binary files /dev/null and b/.cache/clangd/index/test_wrapper_gemm.cpp.74B7113BB3F74220.idx differ diff --git a/.cache/clangd/index/test_wrapper_layout.cpp.6A8EFC4867A12799.idx b/.cache/clangd/index/test_wrapper_layout.cpp.6A8EFC4867A12799.idx new file mode 100755 index 0000000000000000000000000000000000000000..49228f9cc3e489494c5c5c8ff3449b22f19f167e Binary files /dev/null and b/.cache/clangd/index/test_wrapper_layout.cpp.6A8EFC4867A12799.idx differ diff --git a/.cache/clangd/index/test_wrapper_partition.cpp.EDFCA4C2C7218226.idx b/.cache/clangd/index/test_wrapper_partition.cpp.EDFCA4C2C7218226.idx new file mode 100755 index 0000000000000000000000000000000000000000..0e37ec3b0c085d59e48334ee502e8dcfe9065bad Binary files /dev/null and b/.cache/clangd/index/test_wrapper_partition.cpp.EDFCA4C2C7218226.idx differ diff --git a/.cache/clangd/index/test_wrapper_tensor.cpp.A4AB6B2A22D4689E.idx b/.cache/clangd/index/test_wrapper_tensor.cpp.A4AB6B2A22D4689E.idx new file mode 100755 index 0000000000000000000000000000000000000000..4577e6c7301da64d80ab6af2962478a043d4ab47 Binary files /dev/null and b/.cache/clangd/index/test_wrapper_tensor.cpp.A4AB6B2A22D4689E.idx differ diff --git a/.cache/clangd/index/thread_group.hpp.568EEF4CF01C2640.idx b/.cache/clangd/index/thread_group.hpp.568EEF4CF01C2640.idx new file mode 100755 index 0000000000000000000000000000000000000000..f86d2531a94fae2458e103b3687e2e54c5fba651 Binary files /dev/null and b/.cache/clangd/index/thread_group.hpp.568EEF4CF01C2640.idx differ diff --git a/.cache/clangd/index/thread_group_tensor_slice_transfer_direct_load.hpp.5F79EA1E492991C0.idx b/.cache/clangd/index/thread_group_tensor_slice_transfer_direct_load.hpp.5F79EA1E492991C0.idx new file mode 100755 index 0000000000000000000000000000000000000000..7a45d33ad545cb5fa7cc44d5356da86b8a7c7577 Binary files /dev/null and b/.cache/clangd/index/thread_group_tensor_slice_transfer_direct_load.hpp.5F79EA1E492991C0.idx differ diff --git a/.cache/clangd/index/thread_group_tensor_slice_transfer_v4r1.hpp.CC30E09EE6FAA0EA.idx b/.cache/clangd/index/thread_group_tensor_slice_transfer_v4r1.hpp.CC30E09EE6FAA0EA.idx new file mode 100755 index 0000000000000000000000000000000000000000..f4738a483ea5463f95e028212cb037ab111f043f Binary files /dev/null and b/.cache/clangd/index/thread_group_tensor_slice_transfer_v4r1.hpp.CC30E09EE6FAA0EA.idx differ diff --git a/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r1.hpp.F8F9EEB58869763C.idx b/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r1.hpp.F8F9EEB58869763C.idx new file mode 100755 index 0000000000000000000000000000000000000000..5ef0ff2ddb61953e2549ad82ae694bef0dfa8ef9 Binary files /dev/null and b/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r1.hpp.F8F9EEB58869763C.idx differ diff --git a/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r1r2.hpp.34BED592A25E7D4A.idx b/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r1r2.hpp.34BED592A25E7D4A.idx new file mode 100755 index 0000000000000000000000000000000000000000..97ce4e83639138f50e714f50e60dbdcbb7badffc Binary files /dev/null and b/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r1r2.hpp.34BED592A25E7D4A.idx differ diff --git a/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r2.hpp.B8541658FFD968DD.idx b/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r2.hpp.B8541658FFD968DD.idx new file mode 100755 index 0000000000000000000000000000000000000000..b6bea96007c45be989b2d6ba1029f20f7fbc4211 Binary files /dev/null and b/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r2.hpp.B8541658FFD968DD.idx differ diff --git a/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r3.hpp.F9C1F18AD045D6D7.idx b/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r3.hpp.F9C1F18AD045D6D7.idx new file mode 100755 index 0000000000000000000000000000000000000000..19e6e5742fe619282de6fc791725986d9ec06a88 Binary files /dev/null and b/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r3.hpp.F9C1F18AD045D6D7.idx differ diff --git a/.cache/clangd/index/thread_group_tensor_slice_transfer_v7.hpp.85ADAEC27150419B.idx b/.cache/clangd/index/thread_group_tensor_slice_transfer_v7.hpp.85ADAEC27150419B.idx new file mode 100755 index 0000000000000000000000000000000000000000..0fc4218a34ce48a27f06718c75074e84069c18ee Binary files /dev/null and b/.cache/clangd/index/thread_group_tensor_slice_transfer_v7.hpp.85ADAEC27150419B.idx differ diff --git a/.cache/clangd/index/thread_group_tensor_slice_transfer_v7r2.hpp.E55C206DD2AD1B19.idx b/.cache/clangd/index/thread_group_tensor_slice_transfer_v7r2.hpp.E55C206DD2AD1B19.idx new file mode 100755 index 0000000000000000000000000000000000000000..5176faff8e49be827be63f8871d18c6fbe45eb70 Binary files /dev/null and b/.cache/clangd/index/thread_group_tensor_slice_transfer_v7r2.hpp.E55C206DD2AD1B19.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer.hpp.CE773F216A04AFAA.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer.hpp.CE773F216A04AFAA.idx new file mode 100755 index 0000000000000000000000000000000000000000..501c836879fcedd805e71a58bbc3b0ef695e0b15 Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer.hpp.CE773F216A04AFAA.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer_v3r1.hpp.2D0C9495DC723ACC.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer_v3r1.hpp.2D0C9495DC723ACC.idx new file mode 100755 index 0000000000000000000000000000000000000000..c76e2c27095379a743a1cdfb17f25937f988483b Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer_v3r1.hpp.2D0C9495DC723ACC.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer_v4r1.hpp.42FD0DF31A3BFDC2.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer_v4r1.hpp.42FD0DF31A3BFDC2.idx new file mode 100755 index 0000000000000000000000000000000000000000..3c798531443192eae8e39f1707e83a3a1509a29a Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer_v4r1.hpp.42FD0DF31A3BFDC2.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r1.hpp.7AB8A56ED737D765.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r1.hpp.7AB8A56ED737D765.idx new file mode 100755 index 0000000000000000000000000000000000000000..01fe10d83a1853daaab2ad56bcf35519660f1114 Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r1.hpp.7AB8A56ED737D765.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r1r2.hpp.AD9623F82BA99EFC.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r1r2.hpp.AD9623F82BA99EFC.idx new file mode 100755 index 0000000000000000000000000000000000000000..952c3bdd4237a7e1e4312fc747c3b796f50c3197 Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r1r2.hpp.AD9623F82BA99EFC.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r2.hpp.D28171A7A44ECC24.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r2.hpp.D28171A7A44ECC24.idx new file mode 100755 index 0000000000000000000000000000000000000000..e9dc97dc49a19e27e83e50d29cc91cd93f90e4d2 Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r2.hpp.D28171A7A44ECC24.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r3.hpp.C25C8A66C4CF26F7.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r3.hpp.C25C8A66C4CF26F7.idx new file mode 100755 index 0000000000000000000000000000000000000000..9300b0337c3440ca2864219478eea864a2e595db Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r3.hpp.C25C8A66C4CF26F7.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer_v7.hpp.5C98C34294C833A3.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer_v7.hpp.5C98C34294C833A3.idx new file mode 100755 index 0000000000000000000000000000000000000000..cc94facf536b0622d50f7f28234be291faf64ff4 Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer_v7.hpp.5C98C34294C833A3.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer_v7r2.hpp.891A886C76B49727.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer_v7r2.hpp.891A886C76B49727.idx new file mode 100755 index 0000000000000000000000000000000000000000..20fa3b763d819e1a54e7aa9ef94762674a7b5eff Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer_v7r2.hpp.891A886C76B49727.idx differ diff --git a/.cache/clangd/index/threadwise_welford.hpp.CCCD2666E9AE4B4E.idx b/.cache/clangd/index/threadwise_welford.hpp.CCCD2666E9AE4B4E.idx new file mode 100755 index 0000000000000000000000000000000000000000..9c1f309e8ae278e861c2623a5f70e78b10f954e4 Binary files /dev/null and b/.cache/clangd/index/threadwise_welford.hpp.CCCD2666E9AE4B4E.idx differ diff --git a/.cache/clangd/index/transform_contraction_to_gemm.hpp.51AB58D6126FEA84.idx b/.cache/clangd/index/transform_contraction_to_gemm.hpp.51AB58D6126FEA84.idx new file mode 100755 index 0000000000000000000000000000000000000000..08c0cb41becfdca291b34c20d3c9034bfb363813 Binary files /dev/null and b/.cache/clangd/index/transform_contraction_to_gemm.hpp.51AB58D6126FEA84.idx differ diff --git a/.cache/clangd/index/transform_conv_bwd_data_to_gemm_v1.hpp.341C45E4BB62B32F.idx b/.cache/clangd/index/transform_conv_bwd_data_to_gemm_v1.hpp.341C45E4BB62B32F.idx new file mode 100755 index 0000000000000000000000000000000000000000..d41b4c9578ccadc88eb8f77de8edd4c91d75fc12 Binary files /dev/null and b/.cache/clangd/index/transform_conv_bwd_data_to_gemm_v1.hpp.341C45E4BB62B32F.idx differ diff --git a/.cache/clangd/index/transform_conv_fwd_to_gemm.hpp.14B4756DD59EEE91.idx b/.cache/clangd/index/transform_conv_fwd_to_gemm.hpp.14B4756DD59EEE91.idx new file mode 100755 index 0000000000000000000000000000000000000000..73d34c5cdd7da53a4f0109995086b80757a5f944 Binary files /dev/null and b/.cache/clangd/index/transform_conv_fwd_to_gemm.hpp.14B4756DD59EEE91.idx differ diff --git a/.cache/clangd/index/transpose_3d.hpp.F3212DEAAECABFBD.idx b/.cache/clangd/index/transpose_3d.hpp.F3212DEAAECABFBD.idx new file mode 100755 index 0000000000000000000000000000000000000000..0178c3a083d5a044d20e88a9a878debda8dc37a3 Binary files /dev/null and b/.cache/clangd/index/transpose_3d.hpp.F3212DEAAECABFBD.idx differ diff --git a/.cache/clangd/index/transpose_vectors.hpp.7945D9ABB9DC55BD.idx b/.cache/clangd/index/transpose_vectors.hpp.7945D9ABB9DC55BD.idx new file mode 100755 index 0000000000000000000000000000000000000000..41e7f1503951e0cecdd0f11384bcf22f620febf9 Binary files /dev/null and b/.cache/clangd/index/transpose_vectors.hpp.7945D9ABB9DC55BD.idx differ diff --git a/.cache/clangd/index/tuple.hpp.3819E4E0F87690B8.idx b/.cache/clangd/index/tuple.hpp.3819E4E0F87690B8.idx new file mode 100755 index 0000000000000000000000000000000000000000..cfed3c0a380571aed5bd84b4b68a86bb821326ee Binary files /dev/null and b/.cache/clangd/index/tuple.hpp.3819E4E0F87690B8.idx differ diff --git a/.cache/clangd/index/tuple_helper.hpp.73EC15D672B6492A.idx b/.cache/clangd/index/tuple_helper.hpp.73EC15D672B6492A.idx new file mode 100755 index 0000000000000000000000000000000000000000..cecd253d159c4566b62e702fed43f561c2d3b553 Binary files /dev/null and b/.cache/clangd/index/tuple_helper.hpp.73EC15D672B6492A.idx differ diff --git a/.cache/clangd/index/type.hpp.3093CAE9B6EE9081.idx b/.cache/clangd/index/type.hpp.3093CAE9B6EE9081.idx new file mode 100755 index 0000000000000000000000000000000000000000..43c0f57d6eaf1658027b3d9b32902ebf5a2bdbed Binary files /dev/null and b/.cache/clangd/index/type.hpp.3093CAE9B6EE9081.idx differ diff --git a/.cache/clangd/index/type_convert.hpp.5842DA15997DD61D.idx b/.cache/clangd/index/type_convert.hpp.5842DA15997DD61D.idx new file mode 100755 index 0000000000000000000000000000000000000000..769821a378a182754738589f19e214d14171b189 Binary files /dev/null and b/.cache/clangd/index/type_convert.hpp.5842DA15997DD61D.idx differ diff --git a/.cache/clangd/index/type_convert_const.cpp.46FAD4095BF1793B.idx b/.cache/clangd/index/type_convert_const.cpp.46FAD4095BF1793B.idx new file mode 100755 index 0000000000000000000000000000000000000000..21ac067ba2ea0b23507b3290add6854925735aaa Binary files /dev/null and b/.cache/clangd/index/type_convert_const.cpp.46FAD4095BF1793B.idx differ diff --git a/.cache/clangd/index/unary_element_wise_operation.hpp.DD0C457D046F7875.idx b/.cache/clangd/index/unary_element_wise_operation.hpp.DD0C457D046F7875.idx new file mode 100755 index 0000000000000000000000000000000000000000..5e11cde6f128ebd1826506d86dc3d3da12f4bd50 Binary files /dev/null and b/.cache/clangd/index/unary_element_wise_operation.hpp.DD0C457D046F7875.idx differ diff --git a/.cache/clangd/index/welford_helper.hpp.DC75228A25D5F78E.idx b/.cache/clangd/index/welford_helper.hpp.DC75228A25D5F78E.idx new file mode 100755 index 0000000000000000000000000000000000000000..7c9d29fa4e245b5aa5f97b11ae376bc43f661065 Binary files /dev/null and b/.cache/clangd/index/welford_helper.hpp.DC75228A25D5F78E.idx differ diff --git a/.cache/clangd/index/wmma_gemm.hpp.260BC1EC1DC7109A.idx b/.cache/clangd/index/wmma_gemm.hpp.260BC1EC1DC7109A.idx new file mode 100755 index 0000000000000000000000000000000000000000..dc97c586c708b18f6f8b7f6d6e8acb40a17da600 Binary files /dev/null and b/.cache/clangd/index/wmma_gemm.hpp.260BC1EC1DC7109A.idx differ diff --git a/.cache/clangd/index/workgroup_barrier.hpp.E88FB4AF7CFEB4E5.idx b/.cache/clangd/index/workgroup_barrier.hpp.E88FB4AF7CFEB4E5.idx new file mode 100755 index 0000000000000000000000000000000000000000..f5902d559933b65554d7ef33138152e81ae58dc9 Binary files /dev/null and b/.cache/clangd/index/workgroup_barrier.hpp.E88FB4AF7CFEB4E5.idx differ diff --git a/.cache/clangd/index/workgroup_synchronization.hpp.EE3FBA63C405EAA9.idx b/.cache/clangd/index/workgroup_synchronization.hpp.EE3FBA63C405EAA9.idx new file mode 100755 index 0000000000000000000000000000000000000000..39f30da4f402c88a90ac2eaf4bf7bec5f92a3023 Binary files /dev/null and b/.cache/clangd/index/workgroup_synchronization.hpp.EE3FBA63C405EAA9.idx differ diff --git a/.cache/clangd/index/xdlops_gemm.hpp.C8FD7C44D6227313.idx b/.cache/clangd/index/xdlops_gemm.hpp.C8FD7C44D6227313.idx new file mode 100755 index 0000000000000000000000000000000000000000..3a523c7a8a98de000851f428d21121b14eb79107 Binary files /dev/null and b/.cache/clangd/index/xdlops_gemm.hpp.C8FD7C44D6227313.idx differ diff --git a/.clang-format b/.clang-format old mode 100644 new mode 100755 diff --git a/.clang-tidy b/.clang-tidy old mode 100644 new mode 100755 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS old mode 100644 new mode 100755 diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml old mode 100644 new mode 100755 diff --git a/.github/ISSUE_TEMPLATE/issue_report.yml b/.github/ISSUE_TEMPLATE/issue_report.yml old mode 100644 new mode 100755 diff --git a/.github/dependabot.yml b/.github/dependabot.yml old mode 100644 new mode 100755 diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml old mode 100644 new mode 100755 diff --git a/.readthedocs.yaml b/.readthedocs.yaml old mode 100644 new mode 100755 diff --git a/CHANGELOG.md b/CHANGELOG.md old mode 100644 new mode 100755 diff --git a/CITATION.cff b/CITATION.cff old mode 100644 new mode 100755 diff --git a/CMakeLists.txt b/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md old mode 100644 new mode 100755 diff --git a/Config.cmake.in b/Config.cmake.in old mode 100644 new mode 100755 diff --git a/Dockerfile b/Dockerfile old mode 100644 new mode 100755 diff --git a/Jenkinsfile b/Jenkinsfile old mode 100644 new mode 100755 diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/client_example/01_gemm/CMakeLists.txt b/client_example/01_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/01_gemm/gemm.cpp b/client_example/01_gemm/gemm.cpp old mode 100644 new mode 100755 diff --git a/client_example/02_gemm_add_add_fastgelu/CMakeLists.txt b/client_example/02_gemm_add_add_fastgelu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp old mode 100644 new mode 100755 diff --git a/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp old mode 100644 new mode 100755 diff --git a/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp old mode 100644 new mode 100755 diff --git a/client_example/03_gemm_layernorm/CMakeLists.txt b/client_example/03_gemm_layernorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp b/client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp old mode 100644 new mode 100755 diff --git a/client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp b/client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp old mode 100644 new mode 100755 diff --git a/client_example/04_contraction/CMakeLists.txt b/client_example/04_contraction/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/04_contraction/contraction_bilinear_fp32.cpp b/client_example/04_contraction/contraction_bilinear_fp32.cpp old mode 100644 new mode 100755 diff --git a/client_example/04_contraction/contraction_bilinear_fp64.cpp b/client_example/04_contraction/contraction_bilinear_fp64.cpp old mode 100644 new mode 100755 diff --git a/client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp b/client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/04_contraction/contraction_scale_fp32.cpp b/client_example/04_contraction/contraction_scale_fp32.cpp old mode 100644 new mode 100755 diff --git a/client_example/04_contraction/contraction_scale_fp64.cpp b/client_example/04_contraction/contraction_scale_fp64.cpp old mode 100644 new mode 100755 diff --git a/client_example/05_layernorm/CMakeLists.txt b/client_example/05_layernorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/05_layernorm/layernorm2d_bwd_data.cpp b/client_example/05_layernorm/layernorm2d_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/client_example/05_layernorm/layernorm2d_bwd_gamma_beta.cpp b/client_example/05_layernorm/layernorm2d_bwd_gamma_beta.cpp old mode 100644 new mode 100755 diff --git a/client_example/05_layernorm/layernorm2d_fwd.cpp b/client_example/05_layernorm/layernorm2d_fwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/05_layernorm/layernorm4d_fwd.cpp b/client_example/05_layernorm/layernorm4d_fwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/06_softmax/CMakeLists.txt b/client_example/06_softmax/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/06_softmax/softmax4d.cpp b/client_example/06_softmax/softmax4d.cpp old mode 100644 new mode 100755 diff --git a/client_example/07_grouped_convnd_fwd/CMakeLists.txt b/client_example/07_grouped_convnd_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/07_grouped_convnd_fwd/grouped_conv1d_fwd.cpp b/client_example/07_grouped_convnd_fwd/grouped_conv1d_fwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp b/client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/08_fused_attention/CMakeLists.txt b/client_example/08_fused_attention/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/08_fused_attention/fused_attention.cpp b/client_example/08_fused_attention/fused_attention.cpp old mode 100644 new mode 100755 diff --git a/client_example/08_fused_attention/fused_attention_bias.cpp b/client_example/08_fused_attention/fused_attention_bias.cpp old mode 100644 new mode 100755 diff --git a/client_example/09_quantization/CMakeLists.txt b/client_example/09_quantization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/09_quantization/conv2d_fwd_bias_relu_perchannel_quantization.cpp b/client_example/09_quantization/conv2d_fwd_bias_relu_perchannel_quantization.cpp old mode 100644 new mode 100755 diff --git a/client_example/09_quantization/conv2d_fwd_bias_relu_perlayer_quantization.cpp b/client_example/09_quantization/conv2d_fwd_bias_relu_perlayer_quantization.cpp old mode 100644 new mode 100755 diff --git a/client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp b/client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp old mode 100644 new mode 100755 diff --git a/client_example/09_quantization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp b/client_example/09_quantization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp old mode 100644 new mode 100755 diff --git a/client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp b/client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp old mode 100644 new mode 100755 diff --git a/client_example/09_quantization/conv2d_fwd_perlayer_quantization.cpp b/client_example/09_quantization/conv2d_fwd_perlayer_quantization.cpp old mode 100644 new mode 100755 diff --git a/client_example/09_quantization/gemm_quantization.cpp b/client_example/09_quantization/gemm_quantization.cpp old mode 100644 new mode 100755 diff --git a/client_example/10_grouped_convnd_bwd_data/CMakeLists.txt b/client_example/10_grouped_convnd_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/10_grouped_convnd_bwd_data/grouped_conv2d_bwd_data.cpp b/client_example/10_grouped_convnd_bwd_data/grouped_conv2d_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data.cpp b/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp b/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp old mode 100644 new mode 100755 diff --git a/client_example/11_grouped_conv_bwd_weight/CMakeLists.txt b/client_example/11_grouped_conv_bwd_weight/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/11_grouped_conv_bwd_weight/common.hpp b/client_example/11_grouped_conv_bwd_weight/common.hpp old mode 100644 new mode 100755 diff --git a/client_example/11_grouped_conv_bwd_weight/grouped_conv1d_bwd_weight_fp16.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv1d_bwd_weight_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/11_grouped_conv_bwd_weight/grouped_conv2d_bwd_weight_fp16.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv2d_bwd_weight_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8.cpp old mode 100644 new mode 100755 diff --git a/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp32.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp32.cpp old mode 100644 new mode 100755 diff --git a/client_example/12_elementwise_normalization/CMakeLists.txt b/client_example/12_elementwise_normalization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/12_elementwise_normalization/elementwise_layernorm2d.cpp b/client_example/12_elementwise_normalization/elementwise_layernorm2d.cpp old mode 100644 new mode 100755 diff --git a/client_example/13_batchnorm/CMakeLists.txt b/client_example/13_batchnorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/13_batchnorm/batchnorm_bwd_nhwc.cpp b/client_example/13_batchnorm/batchnorm_bwd_nhwc.cpp old mode 100644 new mode 100755 diff --git a/client_example/13_batchnorm/batchnorm_fwd_nhwc.cpp b/client_example/13_batchnorm/batchnorm_fwd_nhwc.cpp old mode 100644 new mode 100755 diff --git a/client_example/13_batchnorm/batchnorm_infer_nhwc.cpp b/client_example/13_batchnorm/batchnorm_infer_nhwc.cpp old mode 100644 new mode 100755 diff --git a/client_example/14_instance_id/CMakeLists.txt b/client_example/14_instance_id/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/14_instance_id/batchnorm_fwd_instance_id.cpp b/client_example/14_instance_id/batchnorm_fwd_instance_id.cpp old mode 100644 new mode 100755 diff --git a/client_example/15_convnd_bwd_data/CMakeLists.txt b/client_example/15_convnd_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/15_convnd_bwd_data/common.hpp b/client_example/15_convnd_bwd_data/common.hpp old mode 100644 new mode 100755 diff --git a/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp16.cpp b/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp32.cpp b/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp32.cpp old mode 100644 new mode 100755 diff --git a/client_example/15_gemm_add_multiply/CMakeLists.txt b/client_example/15_gemm_add_multiply/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/15_gemm_add_multiply/gemm_add_multiply.cpp b/client_example/15_gemm_add_multiply/gemm_add_multiply.cpp old mode 100644 new mode 100755 diff --git a/client_example/15_reduce/CMakeLists.txt b/client_example/15_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/15_reduce/reduce_nhwc_c.cpp b/client_example/15_reduce/reduce_nhwc_c.cpp old mode 100644 new mode 100755 diff --git a/client_example/16_convnd_fwd/CMakeLists.txt b/client_example/16_convnd_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/16_convnd_fwd/common.hpp b/client_example/16_convnd_fwd/common.hpp old mode 100644 new mode 100755 diff --git a/client_example/16_convnd_fwd/conv3d_fwd_fp16.cpp b/client_example/16_convnd_fwd/conv3d_fwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/16_convnd_fwd/conv3d_fwd_fp16_comp_fp8.cpp b/client_example/16_convnd_fwd/conv3d_fwd_fp16_comp_fp8.cpp old mode 100644 new mode 100755 diff --git a/client_example/16_convnd_fwd/conv3d_fwd_fp32.cpp b/client_example/16_convnd_fwd/conv3d_fwd_fp32.cpp old mode 100644 new mode 100755 diff --git a/client_example/17_grouped_gemm_fastgelu/CMakeLists.txt b/client_example/17_grouped_gemm_fastgelu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/17_grouped_gemm_fastgelu/grouped_gemm_fastgelu.cpp b/client_example/17_grouped_gemm_fastgelu/grouped_gemm_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/client_example/18_groupnorm/CMakeLists.txt b/client_example/18_groupnorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/18_groupnorm/groupnorm_bwd_data.cpp b/client_example/18_groupnorm/groupnorm_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/client_example/18_groupnorm/groupnorm_bwd_gamma_beta.cpp b/client_example/18_groupnorm/groupnorm_bwd_gamma_beta.cpp old mode 100644 new mode 100755 diff --git a/client_example/18_groupnorm/groupnorm_swish_fwd.cpp b/client_example/18_groupnorm/groupnorm_swish_fwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/19_pool/CMakeLists.txt b/client_example/19_pool/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/19_pool/avg_pool3d_bwd.cpp b/client_example/19_pool/avg_pool3d_bwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/19_pool/avg_pool3d_fwd.cpp b/client_example/19_pool/avg_pool3d_fwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/19_pool/max_pool2d_bwd.cpp b/client_example/19_pool/max_pool2d_bwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/19_pool/max_pool2d_fwd.cpp b/client_example/19_pool/max_pool2d_fwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/20_splitk_gemm/CMakeLists.txt b/client_example/20_splitk_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/20_splitk_gemm/splitK_gemm_fp16_f8.cpp b/client_example/20_splitk_gemm/splitK_gemm_fp16_f8.cpp old mode 100644 new mode 100755 diff --git a/client_example/21_grouped_gemm_bias/CMakeLists.txt b/client_example/21_grouped_gemm_bias/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/21_grouped_gemm_bias/grouped_gemm_fixed_nk_bias_fp16.cpp b/client_example/21_grouped_gemm_bias/grouped_gemm_fixed_nk_bias_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/22_grouped_gemm/CMakeLists.txt b/client_example/22_grouped_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_bf16.cpp b/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_bf16.cpp old mode 100644 new mode 100755 diff --git a/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp16.cpp b/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp8.cpp b/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp8.cpp old mode 100644 new mode 100755 diff --git a/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_i8.cpp b/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_i8.cpp old mode 100644 new mode 100755 diff --git a/client_example/22_im2col_col2im/CMakeLists.txt b/client_example/22_im2col_col2im/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/22_im2col_col2im/column_to_image.cpp b/client_example/22_im2col_col2im/column_to_image.cpp old mode 100644 new mode 100755 diff --git a/client_example/22_im2col_col2im/image_to_column.cpp b/client_example/22_im2col_col2im/image_to_column.cpp old mode 100644 new mode 100755 diff --git a/client_example/23_elementwise_transpose/CMakeLists.txt b/client_example/23_elementwise_transpose/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/23_elementwise_transpose/elementwise_transpose_3d.cpp b/client_example/23_elementwise_transpose/elementwise_transpose_3d.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/CMakeLists.txt b/client_example/24_grouped_conv_activation/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_bwd_data_bilinear/grouped_conv_bwd_data_bilinear_residual_fp16.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_bwd_data_bilinear/grouped_conv_bwd_data_bilinear_residual_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_bwd_data_scale/grouped_conv_bwd_data_scale_fp16.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_bwd_data_scale/grouped_conv_bwd_data_scale_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_bilinear/grouped_conv_fwd_bilinear_residual_fp16.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_bilinear/grouped_conv_fwd_bilinear_residual_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scale/grouped_conv_fwd_scale_fp16.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scale/grouped_conv_fwd_scale_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab.inc b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab.inc old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_bf16.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_bf16.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_fp16.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_fp32.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_fp32.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_int8.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_int8.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu.inc b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu.inc old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_bf16.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_bf16.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_fp16.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_fp32.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_fp32.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_int8.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_int8.cpp old mode 100644 new mode 100755 diff --git a/client_example/25_wrapper/CMakeLists.txt b/client_example/25_wrapper/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/25_wrapper/README.md b/client_example/25_wrapper/README.md old mode 100644 new mode 100755 diff --git a/client_example/25_wrapper/tensor_transform_using_wrapper.cpp b/client_example/25_wrapper/tensor_transform_using_wrapper.cpp old mode 100644 new mode 100755 diff --git a/client_example/25_wrapper/wrapper_basic_gemm.cpp b/client_example/25_wrapper/wrapper_basic_gemm.cpp old mode 100644 new mode 100755 diff --git a/client_example/25_wrapper/wrapper_img2col.cpp b/client_example/25_wrapper/wrapper_img2col.cpp old mode 100644 new mode 100755 diff --git a/client_example/25_wrapper/wrapper_optimized_gemm.cpp b/client_example/25_wrapper/wrapper_optimized_gemm.cpp old mode 100644 new mode 100755 diff --git a/client_example/CMakeLists.txt b/client_example/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/README.md b/client_example/README.md old mode 100644 new mode 100755 diff --git a/cmake/Analyzers.cmake b/cmake/Analyzers.cmake old mode 100644 new mode 100755 diff --git a/cmake/ClangTidy.cmake b/cmake/ClangTidy.cmake old mode 100644 new mode 100755 diff --git a/cmake/CppCheck.cmake b/cmake/CppCheck.cmake old mode 100644 new mode 100755 diff --git a/cmake/DoxygenDoc.cmake b/cmake/DoxygenDoc.cmake old mode 100644 new mode 100755 diff --git a/cmake/Embed.cmake b/cmake/Embed.cmake old mode 100644 new mode 100755 diff --git a/cmake/EnableCompilerWarnings.cmake b/cmake/EnableCompilerWarnings.cmake old mode 100644 new mode 100755 diff --git a/cmake/TargetFlags.cmake b/cmake/TargetFlags.cmake old mode 100644 new mode 100755 diff --git a/cmake/getopt.cmake b/cmake/getopt.cmake old mode 100644 new mode 100755 diff --git a/cmake/gtest.cmake b/cmake/gtest.cmake old mode 100644 new mode 100755 diff --git a/codegen/CMakeLists.txt b/codegen/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/codegen/driver/main.cpp b/codegen/driver/main.cpp old mode 100644 new mode 100755 diff --git a/codegen/include/ck/host/device_gemm_multiple_d.hpp b/codegen/include/ck/host/device_gemm_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/codegen/include/ck/host/device_gemm_multiple_d/operation.hpp b/codegen/include/ck/host/device_gemm_multiple_d/operation.hpp old mode 100644 new mode 100755 diff --git a/codegen/include/ck/host/device_gemm_multiple_d/problem.hpp b/codegen/include/ck/host/device_gemm_multiple_d/problem.hpp old mode 100644 new mode 100755 diff --git a/codegen/include/ck/host/headers.hpp b/codegen/include/ck/host/headers.hpp old mode 100644 new mode 100755 diff --git a/codegen/include/ck/host/operation/gemm.hpp b/codegen/include/ck/host/operation/gemm.hpp old mode 100644 new mode 100755 diff --git a/codegen/include/ck/host/stringutils.hpp b/codegen/include/ck/host/stringutils.hpp old mode 100644 new mode 100755 diff --git a/codegen/include/ck/host/types.hpp b/codegen/include/ck/host/types.hpp old mode 100644 new mode 100755 diff --git a/codegen/include/ck/host/utils.hpp b/codegen/include/ck/host/utils.hpp old mode 100644 new mode 100755 diff --git a/codegen/src/device_gemm_multiple_d.cpp b/codegen/src/device_gemm_multiple_d.cpp old mode 100644 new mode 100755 diff --git a/codegen/src/device_gemm_multiple_d_operation_xdl_cshuffle.cpp b/codegen/src/device_gemm_multiple_d_operation_xdl_cshuffle.cpp old mode 100644 new mode 100755 diff --git a/codegen/src/headers.cpp b/codegen/src/headers.cpp old mode 100644 new mode 100755 diff --git a/codegen/src/types.cpp b/codegen/src/types.cpp old mode 100644 new mode 100755 diff --git a/codegen/src/utils.cpp b/codegen/src/utils.cpp old mode 100644 new mode 100755 diff --git a/codegen/test/CMakeLists.txt b/codegen/test/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/codegen/test/gemm_multiple_d.cpp b/codegen/test/gemm_multiple_d.cpp old mode 100644 new mode 100755 diff --git a/codegen/test/include/test.hpp b/codegen/test/include/test.hpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/CMakeLists.txt b/codegen/test/rtc/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/include/rtc/compile_kernel.hpp b/codegen/test/rtc/include/rtc/compile_kernel.hpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/include/rtc/hip.hpp b/codegen/test/rtc/include/rtc/hip.hpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/include/rtc/kernel.hpp b/codegen/test/rtc/include/rtc/kernel.hpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/include/rtc/manage_ptr.hpp b/codegen/test/rtc/include/rtc/manage_ptr.hpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/include/rtc/tmp_dir.hpp b/codegen/test/rtc/include/rtc/tmp_dir.hpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/src/compile_kernel.cpp b/codegen/test/rtc/src/compile_kernel.cpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/src/hip.cpp b/codegen/test/rtc/src/hip.cpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/src/kernel.cpp b/codegen/test/rtc/src/kernel.cpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/src/tmp_dir.cpp b/codegen/test/rtc/src/tmp_dir.cpp old mode 100644 new mode 100755 diff --git a/dev-requirements.txt b/dev-requirements.txt old mode 100644 new mode 100755 diff --git a/docs/Contributors_Guide.rst b/docs/Contributors_Guide.rst old mode 100644 new mode 100755 diff --git a/docs/conceptual/what-is-ck.rst b/docs/conceptual/what-is-ck.rst old mode 100644 new mode 100755 diff --git a/docs/conf.py b/docs/conf.py old mode 100644 new mode 100755 diff --git a/docs/data/ck_component.png b/docs/data/ck_component.png old mode 100644 new mode 100755 diff --git a/docs/data/ck_layer.png b/docs/data/ck_layer.png old mode 100644 new mode 100755 diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile old mode 100644 new mode 100755 diff --git a/docs/index.rst b/docs/index.rst old mode 100644 new mode 100755 diff --git a/docs/install/dockerhub.rst b/docs/install/dockerhub.rst old mode 100644 new mode 100755 diff --git a/docs/license.rst b/docs/license.rst old mode 100644 new mode 100755 diff --git a/docs/reference/API_Reference_Guide.rst b/docs/reference/API_Reference_Guide.rst old mode 100644 new mode 100755 diff --git a/docs/reference/Supported_Primitives_Guide.rst b/docs/reference/Supported_Primitives_Guide.rst old mode 100644 new mode 100755 diff --git a/docs/reference/wrapper.rst b/docs/reference/wrapper.rst old mode 100644 new mode 100755 diff --git a/docs/refs.bib b/docs/refs.bib old mode 100644 new mode 100755 diff --git a/docs/sphinx/_toc.yml.in b/docs/sphinx/_toc.yml.in old mode 100644 new mode 100755 diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in old mode 100644 new mode 100755 diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt old mode 100644 new mode 100755 diff --git a/docs/tutorial/tutorial_hello_world.rst b/docs/tutorial/tutorial_hello_world.rst old mode 100644 new mode 100755 diff --git a/example/01_gemm/CMakeLists.txt b/example/01_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/01_gemm/README.md b/example/01_gemm/README.md old mode 100644 new mode 100755 diff --git a/example/01_gemm/common.hpp b/example/01_gemm/common.hpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_dl_fp16.cpp b/example/01_gemm/gemm_dl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_dl_fp32.cpp b/example/01_gemm/gemm_dl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_dl_int4.cpp b/example/01_gemm/gemm_dl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_dl_int8.cpp b/example/01_gemm/gemm_dl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_dpp_fp16.cpp b/example/01_gemm/gemm_dpp_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_wmma_fp16.cpp b/example/01_gemm/gemm_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_bf16.cpp b/example/01_gemm/gemm_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_bf16_rtn.cpp b/example/01_gemm/gemm_xdl_bf16_rtn.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_fp16.cpp b/example/01_gemm/gemm_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_fp16_fp8.cpp b/example/01_gemm/gemm_xdl_fp16_fp8.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_fp16_v2.cpp b/example/01_gemm/gemm_xdl_fp16_v2.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_fp64.cpp b/example/01_gemm/gemm_xdl_fp64.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_fp8.cpp b/example/01_gemm/gemm_xdl_fp8.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_fp8_bf8.cpp b/example/01_gemm/gemm_xdl_fp8_bf8.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_int4.cpp b/example/01_gemm/gemm_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_int8.cpp b/example/01_gemm/gemm_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_lds_direct_load_fp16.cpp b/example/01_gemm/gemm_xdl_lds_direct_load_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_lds_direct_load_fp32.cpp b/example/01_gemm/gemm_xdl_lds_direct_load_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp b/example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_streamk.cpp b/example/01_gemm/gemm_xdl_streamk.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_wavelet_fp16.cpp b/example/01_gemm/gemm_xdl_wavelet_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/run_gemm_example.inc b/example/01_gemm/run_gemm_example.inc old mode 100644 new mode 100755 diff --git a/example/02_gemm_bilinear/CMakeLists.txt b/example/02_gemm_bilinear/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/02_gemm_bilinear/README.md b/example/02_gemm_bilinear/README.md old mode 100644 new mode 100755 diff --git a/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp b/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp b/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp old mode 100644 new mode 100755 diff --git a/example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp b/example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/03_gemm_bias_relu/CMakeLists.txt b/example/03_gemm_bias_relu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/03_gemm_bias_relu/README.md b/example/03_gemm_bias_relu/README.md old mode 100644 new mode 100755 diff --git a/example/03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp b/example/03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/CMakeLists.txt b/example/04_gemm_add_add_fastgelu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/README.md b/example/04_gemm_add_add_fastgelu/README.md old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/common.hpp b/example/04_gemm_add_add_fastgelu/common.hpp old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_bf16.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp16.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp32.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int4.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int8.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_lds_direct_load_fp32.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_lds_direct_load_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/run_gemm_add_add_fastgelu_example.inc b/example/04_gemm_add_add_fastgelu/run_gemm_add_add_fastgelu_example.inc old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/CMakeLists.txt b/example/09_convnd_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/README.md b/example/09_convnd_fwd/README.md old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_common.hpp b/example/09_convnd_fwd/convnd_fwd_common.hpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_dl_common.hpp b/example/09_convnd_fwd/convnd_fwd_dl_common.hpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_dl_fp16.cpp b/example/09_convnd_fwd/convnd_fwd_dl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_dl_fp32.cpp b/example/09_convnd_fwd/convnd_fwd_dl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_dl_int8.cpp b/example/09_convnd_fwd/convnd_fwd_dl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_xdl_bf16.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_xdl_fp32.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_xdl_fp64.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_fp64.cpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_xdl_int8.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/run_convnd_fwd_dl_example.inc b/example/09_convnd_fwd/run_convnd_fwd_dl_example.inc old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/run_convnd_fwd_example.inc b/example/09_convnd_fwd/run_convnd_fwd_example.inc old mode 100644 new mode 100755 diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/CMakeLists.txt b/example/10_convnd_fwd_multiple_d_multiple_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp old mode 100644 new mode 100755 diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_bf16.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp16.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp32.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int4.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int8.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/run_convnd_fwd_max_example.inc b/example/10_convnd_fwd_multiple_d_multiple_reduce/run_convnd_fwd_max_example.inc old mode 100644 new mode 100755 diff --git a/example/12_reduce/CMakeLists.txt b/example/12_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/12_reduce/README.md b/example/12_reduce/README.md old mode 100644 new mode 100755 diff --git a/example/12_reduce/reduce_blockwise.cpp b/example/12_reduce/reduce_blockwise.cpp old mode 100644 new mode 100755 diff --git a/example/12_reduce/reduce_blockwise_impl.hpp b/example/12_reduce/reduce_blockwise_impl.hpp old mode 100644 new mode 100755 diff --git a/example/12_reduce/reduce_blockwise_two_call.cpp b/example/12_reduce/reduce_blockwise_two_call.cpp old mode 100644 new mode 100755 diff --git a/example/12_reduce/reduce_example_common.hpp b/example/12_reduce/reduce_example_common.hpp old mode 100644 new mode 100755 diff --git a/example/12_reduce/reduce_multiblock_atomic_add.cpp b/example/12_reduce/reduce_multiblock_atomic_add.cpp old mode 100644 new mode 100755 diff --git a/example/12_reduce/reduce_multiblock_atomic_add_impl.hpp b/example/12_reduce/reduce_multiblock_atomic_add_impl.hpp old mode 100644 new mode 100755 diff --git a/example/13_pool2d_fwd/CMakeLists.txt b/example/13_pool2d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/13_pool2d_fwd/README.md b/example/13_pool2d_fwd/README.md old mode 100644 new mode 100755 diff --git a/example/13_pool2d_fwd/pool2d_fwd_common.hpp b/example/13_pool2d_fwd/pool2d_fwd_common.hpp old mode 100644 new mode 100755 diff --git a/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp b/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/13_pool2d_fwd/pool2d_fwd_fp32.cpp b/example/13_pool2d_fwd/pool2d_fwd_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/14_gemm_quantization/CMakeLists.txt b/example/14_gemm_quantization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/14_gemm_quantization/gemm_dl_quantization_int8.cpp b/example/14_gemm_quantization/gemm_dl_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/14_gemm_quantization/gemm_xdl_bias_relu_quantization_int8.cpp b/example/14_gemm_quantization/gemm_xdl_bias_relu_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/14_gemm_quantization/gemm_xdl_quantization_int8.cpp b/example/14_gemm_quantization/gemm_xdl_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/CMakeLists.txt b/example/15_grouped_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/README.md b/example/15_grouped_gemm/README.md old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_multiple_d_dl_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_multiple_d_dl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_bf16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp8.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp8.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_fp32.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_int4.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_int8.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_splitk_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_splitk_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/run_grouped_gemm_example.inc b/example/15_grouped_gemm/run_grouped_gemm_example.inc old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/CMakeLists.txt b/example/16_gemm_multi_d_multi_reduces/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp b/example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp old mode 100644 new mode 100755 diff --git a/example/17_convnd_bwd_data/CMakeLists.txt b/example/17_convnd_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/17_convnd_bwd_data/README.md b/example/17_convnd_bwd_data/README.md old mode 100644 new mode 100755 diff --git a/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp b/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp old mode 100644 new mode 100755 diff --git a/example/17_convnd_bwd_data/convnd_bwd_data_dl_fp16.cpp b/example/17_convnd_bwd_data/convnd_bwd_data_dl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/17_convnd_bwd_data/convnd_bwd_data_xdl_fp16.cpp b/example/17_convnd_bwd_data/convnd_bwd_data_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/18_batched_gemm_reduce/CMakeLists.txt b/example/18_batched_gemm_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp b/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/19_binary_elementwise/CMakeLists.txt b/example/19_binary_elementwise/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp b/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp old mode 100644 new mode 100755 diff --git a/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp b/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp old mode 100644 new mode 100755 diff --git a/example/19_binary_elementwise/elementwise_add_1d.cpp b/example/19_binary_elementwise/elementwise_add_1d.cpp old mode 100644 new mode 100755 diff --git a/example/19_binary_elementwise/elementwise_add_4d.cpp b/example/19_binary_elementwise/elementwise_add_4d.cpp old mode 100644 new mode 100755 diff --git a/example/20_grouped_conv_bwd_weight/CMakeLists.txt b/example/20_grouped_conv_bwd_weight/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/20_grouped_conv_bwd_weight/common.hpp b/example/20_grouped_conv_bwd_weight/common.hpp old mode 100644 new mode 100755 diff --git a/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_dl_fp16.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_dl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_wmma_fp16.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_bf16.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp old mode 100644 new mode 100755 diff --git a/example/20_grouped_conv_bwd_weight/run_grouped_conv_bwd_weight_example.inc b/example/20_grouped_conv_bwd_weight/run_grouped_conv_bwd_weight_example.inc old mode 100644 new mode 100755 diff --git a/example/21_gemm_layernorm/CMakeLists.txt b/example/21_gemm_layernorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp b/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp b/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/21_gemm_layernorm/gemm_layernorm_xdl_naive_fp16.cpp b/example/21_gemm_layernorm/gemm_layernorm_xdl_naive_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp b/example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/22_cgemm/CMakeLists.txt b/example/22_cgemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/22_cgemm/cgemm_xdl_bf16.cpp b/example/22_cgemm/cgemm_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/22_cgemm/cgemm_xdl_common.hpp b/example/22_cgemm/cgemm_xdl_common.hpp old mode 100644 new mode 100755 diff --git a/example/22_cgemm/cgemm_xdl_fp16.cpp b/example/22_cgemm/cgemm_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/22_cgemm/cgemm_xdl_fp32.cpp b/example/22_cgemm/cgemm_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/22_cgemm/cgemm_xdl_int4.cpp b/example/22_cgemm/cgemm_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/22_cgemm/cgemm_xdl_int8.cpp b/example/22_cgemm/cgemm_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/23_softmax/CMakeLists.txt b/example/23_softmax/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/23_softmax/README.md b/example/23_softmax/README.md old mode 100644 new mode 100755 diff --git a/example/23_softmax/softmax_blockwise.cpp b/example/23_softmax/softmax_blockwise.cpp old mode 100644 new mode 100755 diff --git a/example/24_batched_gemm/CMakeLists.txt b/example/24_batched_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/24_batched_gemm/batched_gemm_xdl_bf16.cpp b/example/24_batched_gemm/batched_gemm_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/24_batched_gemm/batched_gemm_xdl_fp16.cpp b/example/24_batched_gemm/batched_gemm_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/24_batched_gemm/batched_gemm_xdl_fp32.cpp b/example/24_batched_gemm/batched_gemm_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/24_batched_gemm/batched_gemm_xdl_int4.cpp b/example/24_batched_gemm/batched_gemm_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/24_batched_gemm/batched_gemm_xdl_int8.cpp b/example/24_batched_gemm/batched_gemm_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/24_batched_gemm/run_batched_gemm_example.inc b/example/24_batched_gemm/run_batched_gemm_example.inc old mode 100644 new mode 100755 diff --git a/example/25_gemm_bias_e_permute/CMakeLists.txt b/example/25_gemm_bias_e_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp b/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp b/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/CMakeLists.txt b/example/26_contraction/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/26_contraction/README.md b/example/26_contraction/README.md old mode 100644 new mode 100755 diff --git a/example/26_contraction/common_instances.hpp b/example/26_contraction/common_instances.hpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_bilinear_xdl_bf16_compute_fp32.cpp b/example/26_contraction/contraction_bilinear_xdl_bf16_compute_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_bilinear_xdl_fp16_compute_fp32.cpp b/example/26_contraction/contraction_bilinear_xdl_fp16_compute_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_bilinear_xdl_fp32.cpp b/example/26_contraction/contraction_bilinear_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_bilinear_xdl_fp32_compute_bf16.cpp b/example/26_contraction/contraction_bilinear_xdl_fp32_compute_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_bilinear_xdl_fp32_compute_fp16.cpp b/example/26_contraction/contraction_bilinear_xdl_fp32_compute_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_bilinear_xdl_fp64.cpp b/example/26_contraction/contraction_bilinear_xdl_fp64.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_bilinear_xdl_fp64_compute_fp32.cpp b/example/26_contraction/contraction_bilinear_xdl_fp64_compute_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_scale_xdl_bf16_compute_fp32.cpp b/example/26_contraction/contraction_scale_xdl_bf16_compute_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_scale_xdl_fp16_compute_fp32.cpp b/example/26_contraction/contraction_scale_xdl_fp16_compute_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_scale_xdl_fp32.cpp b/example/26_contraction/contraction_scale_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_scale_xdl_fp32_compute_bf16.cpp b/example/26_contraction/contraction_scale_xdl_fp32_compute_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_scale_xdl_fp32_compute_fp16.cpp b/example/26_contraction/contraction_scale_xdl_fp32_compute_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_scale_xdl_fp64.cpp b/example/26_contraction/contraction_scale_xdl_fp64.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_scale_xdl_fp64_compute_fp32.cpp b/example/26_contraction/contraction_scale_xdl_fp64_compute_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/run_contraction_bilinear_example.inc b/example/26_contraction/run_contraction_bilinear_example.inc old mode 100644 new mode 100755 diff --git a/example/26_contraction/run_contraction_scale_example.inc b/example/26_contraction/run_contraction_scale_example.inc old mode 100644 new mode 100755 diff --git a/example/27_layernorm2d_fwd/CMakeLists.txt b/example/27_layernorm2d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/27_layernorm2d_fwd/common.hpp b/example/27_layernorm2d_fwd/common.hpp old mode 100644 new mode 100755 diff --git a/example/27_layernorm2d_fwd/layernorm2d_fwd_fp16.cpp b/example/27_layernorm2d_fwd/layernorm2d_fwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/27_layernorm2d_fwd/layernorm2d_fwd_splitk_fp16.cpp b/example/27_layernorm2d_fwd/layernorm2d_fwd_splitk_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/27_layernorm2d_fwd/run_layernorm_example.inc b/example/27_layernorm2d_fwd/run_layernorm_example.inc old mode 100644 new mode 100755 diff --git a/example/28_grouped_gemm_bias_e_permute/CMakeLists.txt b/example/28_grouped_gemm_bias_e_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp b/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/29_batched_gemm_bias_e_permute/CMakeLists.txt b/example/29_batched_gemm_bias_e_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp b/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp b/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/CMakeLists.txt b/example/30_grouped_conv_fwd_multiple_d/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/README.md b/example/30_grouped_conv_fwd_multiple_d/README.md old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/common.hpp b/example/30_grouped_conv_fwd_multiple_d/common.hpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp b/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_fp16.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_int8.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_int8.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int4.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_xdl_fp16.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_example.inc b/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_example.inc old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_wmma_example.inc b/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_wmma_example.inc old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc b/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc old mode 100644 new mode 100755 diff --git a/example/31_batched_gemm_gemm/CMakeLists.txt b/example/31_batched_gemm_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_bf16.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp16.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp32.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/31_batched_gemm_gemm/run_batched_gemm_gemm_example.inc b/example/31_batched_gemm_gemm/run_batched_gemm_gemm_example.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/CMakeLists.txt b/example/32_batched_gemm_scale_softmax_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/cross_attention_forward_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/cross_attention_forward_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/grouped_query_attention_forward_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/grouped_query_attention_forward_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/multi_query_attention_forward_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/multi_query_attention_forward_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm.inc b/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc b/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute_wmma.inc b/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute_wmma.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/run_cross_attention_wmma.inc b/example/32_batched_gemm_scale_softmax_gemm/run_cross_attention_wmma.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/run_grouped_gemm_scale_softmax_gemm_permute.inc b/example/32_batched_gemm_scale_softmax_gemm/run_grouped_gemm_scale_softmax_gemm_permute.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/run_grouped_query_attention_forward_wmma.inc b/example/32_batched_gemm_scale_softmax_gemm/run_grouped_query_attention_forward_wmma.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/run_multi_query_attention_forward_wmma.inc b/example/32_batched_gemm_scale_softmax_gemm/run_multi_query_attention_forward_wmma.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/run_self_attention_wmma.inc b/example/32_batched_gemm_scale_softmax_gemm/run_self_attention_wmma.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/self_attention_forward_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/self_attention_forward_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/33_multiple_reduce/CMakeLists.txt b/example/33_multiple_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/33_multiple_reduce/README.md b/example/33_multiple_reduce/README.md old mode 100644 new mode 100755 diff --git a/example/33_multiple_reduce/dual_reduce_common.hpp b/example/33_multiple_reduce/dual_reduce_common.hpp old mode 100644 new mode 100755 diff --git a/example/33_multiple_reduce/dual_reduce_multiblock.cpp b/example/33_multiple_reduce/dual_reduce_multiblock.cpp old mode 100644 new mode 100755 diff --git a/example/33_multiple_reduce/dual_reduce_threadwise.cpp b/example/33_multiple_reduce/dual_reduce_threadwise.cpp old mode 100644 new mode 100755 diff --git a/example/34_batchnorm/CMakeLists.txt b/example/34_batchnorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/34_batchnorm/README.md b/example/34_batchnorm/README.md old mode 100644 new mode 100755 diff --git a/example/34_batchnorm/batchnorm_backward_nhwc.cpp b/example/34_batchnorm/batchnorm_backward_nhwc.cpp old mode 100644 new mode 100755 diff --git a/example/34_batchnorm/batchnorm_common.hpp b/example/34_batchnorm/batchnorm_common.hpp old mode 100644 new mode 100755 diff --git a/example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp b/example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp old mode 100644 new mode 100755 diff --git a/example/34_batchnorm/batchnorm_forward_training_nhwc.cpp b/example/34_batchnorm/batchnorm_forward_training_nhwc.cpp old mode 100644 new mode 100755 diff --git a/example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp b/example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp old mode 100644 new mode 100755 diff --git a/example/34_batchnorm/batchnorm_infer_impl.hpp b/example/34_batchnorm/batchnorm_infer_impl.hpp old mode 100644 new mode 100755 diff --git a/example/35_splitK_gemm/CMakeLists.txt b/example/35_splitK_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/35_splitK_gemm/run_splitK_gemm_example.inc b/example/35_splitK_gemm/run_splitK_gemm_example.inc old mode 100644 new mode 100755 index e3690984abc08cff2e5a450f9f6b6ce13217b883..944377a4c4c043e8f3d6e19bd88fc04360c9ab5c --- a/example/35_splitK_gemm/run_splitK_gemm_example.inc +++ b/example/35_splitK_gemm/run_splitK_gemm_example.inc @@ -60,10 +60,14 @@ bool run_splitK_gemm(const ProblemSize& problem_size, const ExecutionConfig& con case 1: a_m_k.GenerateTensorValue(GeneratorTensor_2{-5, 5}); b_k_n.GenerateTensorValue(GeneratorTensor_2{-5, 5}); + //a_m_k.GenerateTensorValue_control_entropy(GeneratorTensor_2); + //b_k_n.GenerateTensorValue_control_entropy(GeneratorTensor_2{-5, 5}); break; case 2: a_m_k.GenerateTensorValue(GeneratorTensor_3{0.0, 1.0}); b_k_n.GenerateTensorValue(GeneratorTensor_3{-0.5, 0.5}); + //a_m_k.GenerateTensorValue(GeneratorTensor_3_control_entropy{}); + //b_k_n.GenerateTensorValue(GeneratorTensor_3_control_entropy{}); break; default: a_m_k.GenerateTensorValue(GeneratorTensor_Sequential<0>{}); diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_bf16.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_fp16_fp8.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_fp16_fp8.cpp old mode 100644 new mode 100755 diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_lds_direct_load_fp16.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_lds_direct_load_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/36_sparse_embedding/CMakeLists.txt b/example/36_sparse_embedding/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp b/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp old mode 100644 new mode 100755 diff --git a/example/37_batched_gemm_add_add_relu_gemm_add/CMakeLists.txt b/example/37_batched_gemm_add_add_relu_gemm_add/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp b/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/38_grouped_conv_bwd_data_multiple_d/CMakeLists.txt b/example/38_grouped_conv_bwd_data_multiple_d/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/38_grouped_conv_bwd_data_multiple_d/common.hpp b/example/38_grouped_conv_bwd_data_multiple_d/common.hpp old mode 100644 new mode 100755 diff --git a/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp b/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_wmma_fp16.cpp b/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16.cpp b/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_bias_relu_example.inc b/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_bias_relu_example.inc old mode 100644 new mode 100755 diff --git a/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc b/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc old mode 100644 new mode 100755 diff --git a/example/39_permute/CMakeLists.txt b/example/39_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/39_permute/common.hpp b/example/39_permute/common.hpp old mode 100644 new mode 100755 diff --git a/example/39_permute/permute_1xHxW_fp16.cpp b/example/39_permute/permute_1xHxW_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/39_permute/permute_HxWx4_fp16.cpp b/example/39_permute/permute_HxWx4_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/39_permute/permute_NxHxW_fp16.cpp b/example/39_permute/permute_NxHxW_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/39_permute/run_permute_bundle_example.inc b/example/39_permute/run_permute_bundle_example.inc old mode 100644 new mode 100755 diff --git a/example/39_permute/run_permute_element_example.inc b/example/39_permute/run_permute_element_example.inc old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/CMakeLists.txt b/example/40_conv2d_fwd_quantization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/common.hpp b/example/40_conv2d_fwd_quantization/common.hpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perlayer_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perlayer_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perchannel_quantization_example.inc b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perchannel_quantization_example.inc old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perlayer_quantization_example.inc b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perlayer_quantization_example.inc old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perchannel_quantization_example.inc b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perchannel_quantization_example.inc old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc old mode 100644 new mode 100755 diff --git a/example/41_grouped_conv_conv_fwd/CMakeLists.txt b/example/41_grouped_conv_conv_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc b/example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc old mode 100644 new mode 100755 diff --git a/example/42_groupnorm_fwd/CMakeLists.txt b/example/42_groupnorm_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/42_groupnorm_fwd/common.hpp b/example/42_groupnorm_fwd/common.hpp old mode 100644 new mode 100755 diff --git a/example/42_groupnorm_fwd/groupnorm_fwd_sigmoid_mul_fp16.cpp b/example/42_groupnorm_fwd/groupnorm_fwd_sigmoid_mul_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/42_groupnorm_fwd/groupnorm_fwd_splitk_fp16.cpp b/example/42_groupnorm_fwd/groupnorm_fwd_splitk_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/42_groupnorm_fwd/groupnorm_fwd_swish_fp16.cpp b/example/42_groupnorm_fwd/groupnorm_fwd_swish_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/42_groupnorm_fwd/run_groupnorm_fwd_example.inc b/example/42_groupnorm_fwd/run_groupnorm_fwd_example.inc old mode 100644 new mode 100755 diff --git a/example/43_splitk_gemm_bias_e_permute/CMakeLists.txt b/example/43_splitk_gemm_bias_e_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp b/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp b/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/CMakeLists.txt b/example/44_elementwise_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/elementwise_permute.cpp b/example/44_elementwise_permute/elementwise_permute.cpp old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/elementwise_permute_3d.cpp b/example/44_elementwise_permute/elementwise_permute_3d.cpp old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp16_2d.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16_2d.cpp old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp16_col.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16_col.cpp old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp16_row.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16_row.cpp old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp32_col.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp32_col.cpp old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp32_row.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp32_row.cpp old mode 100644 new mode 100755 diff --git a/example/45_elementwise_normalization/CMakeLists.txt b/example/45_elementwise_normalization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp b/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp old mode 100644 new mode 100755 diff --git a/example/46_gemm_add_multiply/CMakeLists.txt b/example/46_gemm_add_multiply/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/46_gemm_add_multiply/README.md b/example/46_gemm_add_multiply/README.md old mode 100644 new mode 100755 diff --git a/example/46_gemm_add_multiply/common.hpp b/example/46_gemm_add_multiply/common.hpp old mode 100644 new mode 100755 diff --git a/example/46_gemm_add_multiply/gemm_add_multiply_dl_fp16.cpp b/example/46_gemm_add_multiply/gemm_add_multiply_dl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/46_gemm_add_multiply/gemm_add_multiply_xdl_fp16.cpp b/example/46_gemm_add_multiply/gemm_add_multiply_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/46_gemm_add_multiply/run_gemm_add_multiply_example.inc b/example/46_gemm_add_multiply/run_gemm_add_multiply_example.inc old mode 100644 new mode 100755 diff --git a/example/47_gemm_bias_softmax_gemm_permute/CMakeLists.txt b/example/47_gemm_bias_softmax_gemm_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute.cpp b/example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute.cpp old mode 100644 new mode 100755 diff --git a/example/48_pool3d_fwd/CMakeLists.txt b/example/48_pool3d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/48_pool3d_fwd/pool3d_fwd_common.hpp b/example/48_pool3d_fwd/pool3d_fwd_common.hpp old mode 100644 new mode 100755 diff --git a/example/48_pool3d_fwd/pool3d_fwd_fp16.cpp b/example/48_pool3d_fwd/pool3d_fwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/49_maxpool2d_bwd/CMakeLists.txt b/example/49_maxpool2d_bwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/49_maxpool2d_bwd/maxpool2d_bwd_bf16.cpp b/example/49_maxpool2d_bwd/maxpool2d_bwd_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/49_maxpool2d_bwd/maxpool2d_bwd_common.hpp b/example/49_maxpool2d_bwd/maxpool2d_bwd_common.hpp old mode 100644 new mode 100755 diff --git a/example/49_maxpool2d_bwd/maxpool2d_bwd_fp16.cpp b/example/49_maxpool2d_bwd/maxpool2d_bwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/49_maxpool2d_bwd/maxpool2d_bwd_fp32.cpp b/example/49_maxpool2d_bwd/maxpool2d_bwd_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/50_put_element/CMakeLists.txt b/example/50_put_element/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/50_put_element/put_element_fp16.cpp b/example/50_put_element/put_element_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/51_avgpool3d_bwd/CMakeLists.txt b/example/51_avgpool3d_bwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/51_avgpool3d_bwd/avgpool3d_bwd_bf16.cpp b/example/51_avgpool3d_bwd/avgpool3d_bwd_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/51_avgpool3d_bwd/avgpool3d_bwd_common.hpp b/example/51_avgpool3d_bwd/avgpool3d_bwd_common.hpp old mode 100644 new mode 100755 diff --git a/example/51_avgpool3d_bwd/avgpool3d_bwd_fp16.cpp b/example/51_avgpool3d_bwd/avgpool3d_bwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/51_avgpool3d_bwd/avgpool3d_bwd_fp32.cpp b/example/51_avgpool3d_bwd/avgpool3d_bwd_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/52_im2col_col2im/CMakeLists.txt b/example/52_im2col_col2im/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/52_im2col_col2im/column_to_image_f32.cpp b/example/52_im2col_col2im/column_to_image_f32.cpp old mode 100644 new mode 100755 diff --git a/example/52_im2col_col2im/common.hpp b/example/52_im2col_col2im/common.hpp old mode 100644 new mode 100755 diff --git a/example/52_im2col_col2im/image_to_column_f32.cpp b/example/52_im2col_col2im/image_to_column_f32.cpp old mode 100644 new mode 100755 diff --git a/example/53_layernorm2d_bwd/CMakeLists.txt b/example/53_layernorm2d_bwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/53_layernorm2d_bwd/layernorm2d_bwd_fp32.cpp b/example/53_layernorm2d_bwd/layernorm2d_bwd_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/54_groupnorm_bwd/CMakeLists.txt b/example/54_groupnorm_bwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/54_groupnorm_bwd/groupnorm_bwd_fp32.cpp b/example/54_groupnorm_bwd/groupnorm_bwd_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/60_gemm_multi_ABD/CMakeLists.txt b/example/60_gemm_multi_ABD/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp b/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/61_contraction_multi_ABD/CMakeLists.txt b/example/61_contraction_multi_ABD/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp16.cpp b/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/CMakeLists.txt b/example/62_convnd_activ/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/binary/CMakeLists.txt b/example/62_convnd_activ/binary/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/binary/convnd_bwd_data_xdl_bilinear_residual_fp16.cpp b/example/62_convnd_activ/binary/convnd_bwd_data_xdl_bilinear_residual_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/binary/convnd_fwd_xdl_bilinear_residual_fp16.cpp b/example/62_convnd_activ/binary/convnd_fwd_xdl_bilinear_residual_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp b/example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp b/example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/multi_AB/CMakeLists.txt b/example/62_convnd_activ/multi_AB/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_bf16.cpp b/example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_fp16.cpp b/example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_fp32.cpp b/example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_int8.cpp b/example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_int8.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/multi_AB/convnd_fwd_activ_multi_ab_common.hpp b/example/62_convnd_activ/multi_AB/convnd_fwd_activ_multi_ab_common.hpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/run_convnd_activ_example.inc b/example/62_convnd_activ/run_convnd_activ_example.inc old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/CMakeLists.txt b/example/62_convnd_activ/unary/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_activ_unary_common.hpp b/example/62_convnd_activ/unary/convnd_fwd_activ_unary_common.hpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_abs_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_abs_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_clippedrelu_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_clippedrelu_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_elu_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_elu_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_leakyrelu_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_leakyrelu_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_pow_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_pow_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_relu_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_relu_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_sigmoid_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_sigmoid_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_softrelu_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_softrelu_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_tanh_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_tanh_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/63_layernorm4d_fwd/CMakeLists.txt b/example/63_layernorm4d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/63_layernorm4d_fwd/common.hpp b/example/63_layernorm4d_fwd/common.hpp old mode 100644 new mode 100755 diff --git a/example/63_layernorm4d_fwd/layernorm4d_fwd_fp16.cpp b/example/63_layernorm4d_fwd/layernorm4d_fwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/63_layernorm4d_fwd/layernorm4d_fwd_splitk_fp16.cpp b/example/63_layernorm4d_fwd/layernorm4d_fwd_splitk_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/63_layernorm4d_fwd/run_layernorm4d_fwd_example.inc b/example/63_layernorm4d_fwd/run_layernorm4d_fwd_example.inc old mode 100644 new mode 100755 diff --git a/example/64_fpAintB_gemm/CMakeLists.txt b/example/64_fpAintB_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/64_fpAintB_gemm/common.hpp b/example/64_fpAintB_gemm/common.hpp old mode 100644 new mode 100755 diff --git a/example/64_fpAintB_gemm/fp16int8_gemm_wmma.cpp b/example/64_fpAintB_gemm/fp16int8_gemm_wmma.cpp old mode 100644 new mode 100755 diff --git a/example/64_fpAintB_gemm/run_gemm_example.inc b/example/64_fpAintB_gemm/run_gemm_example.inc old mode 100644 new mode 100755 diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/include/ck/ck.hpp b/include/ck/ck.hpp old mode 100644 new mode 100755 diff --git a/include/ck/config.h.in b/include/ck/config.h.in old mode 100644 new mode 100755 diff --git a/include/ck/host_utility/device_prop.hpp b/include/ck/host_utility/device_prop.hpp old mode 100644 new mode 100755 diff --git a/include/ck/host_utility/hip_check_error.hpp b/include/ck/host_utility/hip_check_error.hpp old mode 100644 new mode 100755 diff --git a/include/ck/host_utility/io.hpp b/include/ck/host_utility/io.hpp old mode 100644 new mode 100755 diff --git a/include/ck/host_utility/kernel_launch.hpp b/include/ck/host_utility/kernel_launch.hpp old mode 100644 new mode 100755 diff --git a/include/ck/host_utility/stream_utility.hpp b/include/ck/host_utility/stream_utility.hpp old mode 100644 new mode 100755 diff --git a/include/ck/problem_transform/transform_forward_convolution3d_into_gemm_v4r4r4_ndhwc_kzyxc_ndhwk.hpp b/include/ck/problem_transform/transform_forward_convolution3d_into_gemm_v4r4r4_ndhwc_kzyxc_ndhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/stream_config.hpp b/include/ck/stream_config.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor/static_tensor.hpp b/include/ck/tensor/static_tensor.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_description/cluster_descriptor.hpp b/include/ck/tensor_description/cluster_descriptor.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_description/multi_index_transform.hpp b/include/ck/tensor_description/multi_index_transform.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_description/multi_index_transform_helper.hpp b/include/ck/tensor_description/multi_index_transform_helper.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_description/tensor_adaptor.hpp b/include/ck/tensor_description/tensor_adaptor.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_description/tensor_descriptor.hpp b/include/ck/tensor_description/tensor_descriptor.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_description/tensor_descriptor_helper.hpp b/include/ck/tensor_description/tensor_descriptor_helper.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_description/tensor_space_filling_curve.hpp b/include/ck/tensor_description/tensor_space_filling_curve.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_dl_v2r3.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_dl_v2r3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v2r2.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v2r2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v3.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_wmma.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_softmax.hpp b/include/ck/tensor_operation/gpu/block/blockwise_softmax.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_tensor_slice_transfer_v5r1.hpp b/include/ck/tensor_operation/gpu/block/blockwise_tensor_slice_transfer_v5r1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_welford.hpp b/include/ck/tensor_operation/gpu/block/blockwise_welford.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/reduction_functions_blockwise.hpp b/include/ck/tensor_operation/gpu/block/reduction_functions_blockwise.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_direct_load.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_direct_load.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1_dequant.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1_dequant.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1r2.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1r2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r2.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r3.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7r2.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7r2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/conv_tensor_rearrange_op.hpp b/include/ck/tensor_operation/gpu/device/conv_tensor_rearrange_op.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/convolution_backward_data_specialization.hpp b/include/ck/tensor_operation/gpu/device/convolution_backward_data_specialization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp b/include/ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp b/include/ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_avgpool_bwd.hpp b/include/ck/tensor_operation/gpu/device/device_avgpool_bwd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_base.hpp b/include/ck/tensor_operation/gpu/device/device_base.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batched_contraction_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_batched_contraction_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batched_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batched_gemm_e_permute.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_e_permute.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batched_gemm_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batched_gemm_multi_d.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_multi_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batched_gemm_multiple_d_gemm_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_multiple_d_gemm_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_permute.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_permute.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batchnorm_backward.hpp b/include/ck/tensor_operation/gpu/device/device_batchnorm_backward.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batchnorm_forward.hpp b/include/ck/tensor_operation/gpu/device/device_batchnorm_forward.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batchnorm_infer.hpp b/include/ck/tensor_operation/gpu/device/device_batchnorm_infer.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_cgemm.hpp b/include/ck/tensor_operation/gpu/device/device_cgemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_contraction_multiple_abd.hpp b/include/ck/tensor_operation/gpu/device/device_contraction_multiple_abd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_contraction_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_contraction_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_conv_bwd_data.hpp b/include/ck/tensor_operation/gpu/device/device_conv_bwd_data.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_conv_fwd.hpp b/include/ck/tensor_operation/gpu/device/device_conv_fwd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation.hpp b/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation_add.hpp b/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation_add.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_conv_tensor_rearrange.hpp b/include/ck/tensor_operation/gpu/device/device_conv_tensor_rearrange.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_elementwise.hpp b/include/ck/tensor_operation/gpu/device/device_elementwise.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_elementwise_normalization.hpp b/include/ck/tensor_operation/gpu/device/device_elementwise_normalization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_elementwise_scale.hpp b/include/ck/tensor_operation/gpu/device/device_elementwise_scale.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_bias_e_permute.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_bias_e_permute.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_dequantB.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_dequantB.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_multiple_abd.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_multiple_abd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_layernorm.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_layernorm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_multiple_r.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_multiple_r.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_reduce.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_reduce.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_splitk.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_splitk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_streamk.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_streamk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_contraction_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_contraction_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_abd.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_abd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_gemm_fixed_nk.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm_fixed_nk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_gemm_splitk.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm_splitk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_max_pool_bwd.hpp b/include/ck/tensor_operation/gpu/device/device_max_pool_bwd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_multiple_reduce.hpp b/include/ck/tensor_operation/gpu/device/device_multiple_reduce.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_normalization_bwd_data.hpp b/include/ck/tensor_operation/gpu/device/device_normalization_bwd_data.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_normalization_bwd_gamma_beta.hpp b/include/ck/tensor_operation/gpu/device/device_normalization_bwd_gamma_beta.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_normalization_fwd.hpp b/include/ck/tensor_operation/gpu/device/device_normalization_fwd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_permute.hpp b/include/ck/tensor_operation/gpu/device/device_permute.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_pool_fwd.hpp b/include/ck/tensor_operation/gpu/device/device_pool_fwd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_put_element.hpp b/include/ck/tensor_operation/gpu/device/device_put_element.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_reduce.hpp b/include/ck/tensor_operation/gpu/device/device_reduce.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_softmax.hpp b/include/ck/tensor_operation/gpu/device/device_softmax.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_splitk_contraction_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_splitk_contraction_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/gemm_specialization.hpp b/include/ck/tensor_operation/gpu/device/gemm_specialization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_avgpool3d_bwd_ndhwc_ndhwc.hpp b/include/ck/tensor_operation/gpu/device/impl/device_avgpool3d_bwd_ndhwc_ndhwc.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multi_d_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multi_d_xdl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_wmma_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_xdl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_backward_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_backward_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl_obsolete.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl_obsolete.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_cgemm_4gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_cgemm_4gemm_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_abd_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_abd_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_contraction_utils.hpp b/include/ck/tensor_operation/gpu/device/impl/device_contraction_utils.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_elementwise_2d_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_elementwise_2d_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_elementwise_3d_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_elementwise_3d_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_elementwise_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_elementwise_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_elementwise_normalization_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_elementwise_normalization_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_fpAintB_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/device/impl/device_fpAintB_gemm_wmma.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_bias_add_reduce_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_bias_add_reduce_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_dl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_wmma_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_lds_direct_load.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_lds_direct_load.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_v2.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_v2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle_lds_direct_load.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle_lds_direct_load.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_streamk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_streamk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_waveletmodel_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_waveletmodel_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_dl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_nhwc_kyxc_nhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_fixed_nk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_fixed_nk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_query_attention_forward_wmma.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_query_attention_forward_wmma.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_max_pool_bwd_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_max_pool_bwd_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_multi_query_attention_forward_wmma.hpp b/include/ck/tensor_operation/gpu/device/impl/device_multi_query_attention_forward_wmma.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_multiblock.hpp b/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_multiblock.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_threadwise.hpp b/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_threadwise.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_normalization_bwd_data_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_normalization_bwd_data_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_normalization_bwd_gamma_beta_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_normalization_bwd_gamma_beta_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_normalization_fwd_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_normalization_fwd_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_normalization_fwd_splitk_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_normalization_fwd_splitk_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_permute_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_permute_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp b/include/ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp b/include/ck/tensor_operation/gpu/device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_put_element_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_put_element_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_reduce_common.hpp b/include/ck/tensor_operation/gpu/device/impl/device_reduce_common.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp b/include/ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_reduce_threadwise.hpp b/include/ck/tensor_operation/gpu/device/impl/device_reduce_threadwise.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_sparse_embeddings_forward_layernorm.hpp b/include/ck/tensor_operation/gpu/device/impl/device_sparse_embeddings_forward_layernorm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/masking_specialization.hpp b/include/ck/tensor_operation/gpu/device/masking_specialization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/matrix_padder.hpp b/include/ck/tensor_operation/gpu/device/matrix_padder.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp b/include/ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/tensor_layout.hpp b/include/ck/tensor_operation/gpu/device/tensor_layout.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/tensor_specialization.hpp b/include/ck/tensor_operation/gpu/device/tensor_specialization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/welford_helper.hpp b/include/ck/tensor_operation/gpu/device/welford_helper.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/element/element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/element_wise_operation.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/element/quantization_operation.hpp b/include/ck/tensor_operation/gpu/element/quantization_operation.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_batchnorm_forward.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_batchnorm_forward.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_first_half.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_first_half.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp b/include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_welford_second_half_layernorm2d.hpp b/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_welford_second_half_layernorm2d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_multiblock.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_multiblock.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_threadwise.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_threadwise.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_wmma_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_backward_blockwise_welford.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_backward_blockwise_welford.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_forward_blockwise_welford.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_forward_blockwise_welford.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d_scale.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d_scale.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_2d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_2d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_3d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_3d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_fpAintB_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_fpAintB_gemm_wmma.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_selector.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_selector.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v4_direct_load.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v4_direct_load.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle_v2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle_v2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_waveletmodel.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_waveletmodel.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_b_lds_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_b_lds_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_streamk.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_streamk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp old mode 100644 new mode 100755 index b52f5c51b1ad267f0b808e283a3fa40ba5de4532..fb89b45f1e1ae0910eaa65b79708fab96ac6c783 --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp @@ -27,13 +27,13 @@ template __global__ void #if CK_USE_LAUNCH_BOUNDS - __launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU) +__launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU) #endif - kernel_gemm_xdlops_v2r4r2_simplified(typename GridwiseGemm::Argument karg, - const Block2CTileMap& b2c_map, - const AElementwiseOperation a_element_op, - const BElementwiseOperation b_element_op, - const CElementwiseOperation c_element_op) + kernel_gemm_xdlops_v2r4r2_simplified(typename GridwiseGemm::Argument karg, + const Block2CTileMap& b2c_map, + const AElementwiseOperation a_element_op, + const BElementwiseOperation b_element_op, + const CElementwiseOperation c_element_op) { #if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \ defined(__gfx94__)) @@ -669,433 +669,450 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4r2 const BElementwiseOperation b_element_op = BElementwiseOperation{}, const CElementwiseOperation c_element_op = CElementwiseOperation{}) { - const FloatA* p_a_grid = karg.p_a_grid; - const FloatB* p_b_grid = karg.p_b_grid; - FloatC* p_c_grid = karg.p_c_grid; - const auto a_b_k0_m_k1_grid_desc = MakeAGridDescriptor_KBatch_K0_M_K1( - karg.M, karg.MPadded, karg.K, karg.StrideA, karg.k_batch, karg.K0Padded, karg.KPadded); - const auto b_b_k0_n_k1_grid_desc = MakeBGridDescriptor_KBatch_K0_N_K1( - karg.K, karg.NPadded, karg.N, karg.StrideB, karg.k_batch, karg.K0Padded, karg.KPadded); - const auto c_grid_desc_m_n = MakeCGridDescriptor_M_N(karg.M, karg.N, karg.StrideC); - - const auto c_grid_desc_mblock_mperblock_nblock_nperblock = - MakeCGridDesc_MBlock_MPerBlock_NBlock_NPerBlock(c_grid_desc_m_n); - - const auto a_grid_buf = make_dynamic_buffer( - p_a_grid, a_b_k0_m_k1_grid_desc.GetElementSpaceSize()); - const auto b_grid_buf = make_dynamic_buffer( - p_b_grid, b_b_k0_n_k1_grid_desc.GetElementSpaceSize()); - auto c_grid_buf = make_dynamic_buffer( - p_c_grid, c_grid_desc_mblock_mperblock_nblock_nperblock.GetElementSpaceSize()); - - // divide block work by [KBatch, M, N] - const auto block_work_idx = - block_2_ctile_map.CalculateBottomIndex(make_multi_index(get_block_1d_id())); - - if(!block_2_ctile_map.ValidCTileIndex( - block_work_idx, - make_tuple(c_grid_desc_mblock_mperblock_nblock_nperblock.GetLength(I0), - c_grid_desc_mblock_mperblock_nblock_nperblock.GetLength(I2)))) + for(auto i = 0; i < 1500; i++) { - return; - } + const FloatA* p_a_grid = karg.p_a_grid; + const FloatB* p_b_grid = karg.p_b_grid; + FloatC* p_c_grid = karg.p_c_grid; + const auto a_b_k0_m_k1_grid_desc = MakeAGridDescriptor_KBatch_K0_M_K1(karg.M, + karg.MPadded, + karg.K, + karg.StrideA, + karg.k_batch, + karg.K0Padded, + karg.KPadded); + const auto b_b_k0_n_k1_grid_desc = MakeBGridDescriptor_KBatch_K0_N_K1(karg.K, + karg.NPadded, + karg.N, + karg.StrideB, + karg.k_batch, + karg.K0Padded, + karg.KPadded); + const auto c_grid_desc_m_n = MakeCGridDescriptor_M_N(karg.M, karg.N, karg.StrideC); + + const auto c_grid_desc_mblock_mperblock_nblock_nperblock = + MakeCGridDesc_MBlock_MPerBlock_NBlock_NPerBlock(c_grid_desc_m_n); + + const auto a_grid_buf = make_dynamic_buffer( + p_a_grid, a_b_k0_m_k1_grid_desc.GetElementSpaceSize()); + const auto b_grid_buf = make_dynamic_buffer( + p_b_grid, b_b_k0_n_k1_grid_desc.GetElementSpaceSize()); + auto c_grid_buf = make_dynamic_buffer( + p_c_grid, c_grid_desc_mblock_mperblock_nblock_nperblock.GetElementSpaceSize()); + + // divide block work by [KBatch, M, N] + const auto block_work_idx = + block_2_ctile_map.CalculateBottomIndex(make_multi_index(get_block_1d_id())); + + if(!block_2_ctile_map.ValidCTileIndex( + block_work_idx, + make_tuple(c_grid_desc_mblock_mperblock_nblock_nperblock.GetLength(I0), + c_grid_desc_mblock_mperblock_nblock_nperblock.GetLength(I2)))) + { + return; + } - const index_t block_m_id = __builtin_amdgcn_readfirstlane(block_work_idx[I1]); - const index_t block_n_id = __builtin_amdgcn_readfirstlane(block_work_idx[I2]); - const index_t k_batch_id = __builtin_amdgcn_readfirstlane(block_work_idx[I0]); + const index_t block_m_id = __builtin_amdgcn_readfirstlane(block_work_idx[I1]); + const index_t block_n_id = __builtin_amdgcn_readfirstlane(block_work_idx[I2]); + const index_t k_batch_id = __builtin_amdgcn_readfirstlane(block_work_idx[I0]); - // HACK: this force m/n_block_data_idx_on_grid into SGPR - const index_t m_block_data_idx_on_grid = - __builtin_amdgcn_readfirstlane(block_m_id * MPerBlock); + // HACK: this force m/n_block_data_idx_on_grid into SGPR + const index_t m_block_data_idx_on_grid = + __builtin_amdgcn_readfirstlane(block_m_id * MPerBlock); - const index_t n_block_data_idx_on_grid = - __builtin_amdgcn_readfirstlane(block_n_id * NPerBlock); + const index_t n_block_data_idx_on_grid = + __builtin_amdgcn_readfirstlane(block_n_id * NPerBlock); - // lds max alignment - constexpr auto max_lds_align = K1; + // lds max alignment + constexpr auto max_lds_align = K1; - // A matrix in LDS memory, dst of blockwise copy - constexpr auto a_k0_m_k1_block_desc = [&]() { - if constexpr(ABlockLdsExtraM) - { - return make_naive_tensor_descriptor( - make_tuple(Number{}, Number{}, K1), - make_tuple(Number{} * K1, K1, I1)); - } - else - { - return make_naive_tensor_descriptor_aligned( - make_tuple(Number{}, Number{}, K1), max_lds_align); - } - }(); + // A matrix in LDS memory, dst of blockwise copy + constexpr auto a_k0_m_k1_block_desc = [&]() { + if constexpr(ABlockLdsExtraM) + { + return make_naive_tensor_descriptor( + make_tuple(Number{}, Number{}, K1), + make_tuple(Number{} * K1, K1, I1)); + } + else + { + return make_naive_tensor_descriptor_aligned( + make_tuple(Number{}, Number{}, K1), max_lds_align); + } + }(); - constexpr auto a_b_k0_m_k1_block_desc = [&]() { - if constexpr(ABlockLdsExtraM) - { - return make_naive_tensor_descriptor( - make_tuple(Number<1>{}, Number{}, Number{}, K1), - make_tuple(Number{} * Number{} * K1, - Number{} * K1, - K1, - I1)); - } - else - { - return make_naive_tensor_descriptor_aligned( - make_tuple(Number<1>{}, Number{}, Number{}, K1), - max_lds_align); - } - }(); - // B matrix in LDS memory, dst of blockwise copy - constexpr auto b_k0_n_k1_block_desc = [&]() { - if constexpr(BBlockLdsExtraN) - { - return make_naive_tensor_descriptor( - make_tuple(Number{}, Number{}, K1), - make_tuple(Number{} * K1, K1, I1)); - } - else - { - return make_naive_tensor_descriptor_aligned( - make_tuple(Number{}, Number{}, K1), max_lds_align); - } - }(); + constexpr auto a_b_k0_m_k1_block_desc = [&]() { + if constexpr(ABlockLdsExtraM) + { + return make_naive_tensor_descriptor( + make_tuple(Number<1>{}, Number{}, Number{}, K1), + make_tuple(Number{} * Number{} * K1, + Number{} * K1, + K1, + I1)); + } + else + { + return make_naive_tensor_descriptor_aligned( + make_tuple(Number<1>{}, Number{}, Number{}, K1), + max_lds_align); + } + }(); + // B matrix in LDS memory, dst of blockwise copy + constexpr auto b_k0_n_k1_block_desc = [&]() { + if constexpr(BBlockLdsExtraN) + { + return make_naive_tensor_descriptor( + make_tuple(Number{}, Number{}, K1), + make_tuple(Number{} * K1, K1, I1)); + } + else + { + return make_naive_tensor_descriptor_aligned( + make_tuple(Number{}, Number{}, K1), max_lds_align); + } + }(); - constexpr auto b_b_k0_n_k1_block_desc = [&]() { - if constexpr(BBlockLdsExtraN) - { - return make_naive_tensor_descriptor( - make_tuple(Number<1>{}, Number{}, Number{}, K1), - make_tuple(Number{} * Number{} * K1, - Number{} * K1, - K1, - I1)); - } - else + constexpr auto b_b_k0_n_k1_block_desc = [&]() { + if constexpr(BBlockLdsExtraN) + { + return make_naive_tensor_descriptor( + make_tuple(Number<1>{}, Number{}, Number{}, K1), + make_tuple(Number{} * Number{} * K1, + Number{} * K1, + K1, + I1)); + } + else + { + return make_naive_tensor_descriptor_aligned( + make_tuple(Number<1>{}, Number{}, Number{}, K1), + max_lds_align); + } + }(); + // A matrix blockwise copy + auto a_blockwise_copy = + ThreadGroupTensorSliceTransfer_v4r1, + ABlockTransferThreadClusterLengths_K0_M_K1, + ABlockTransferThreadClusterArrangeOrder, + FloatA, + LDSTypeA, + decltype(a_b_k0_m_k1_grid_desc), + decltype(a_b_k0_m_k1_block_desc), + ABlockTransferSrcAccessOrder, + Sequence<0, 2, 1, 3>, + ABlockTransferSrcVectorDim, + 3, + ABlockTransferSrcScalarPerVector, + ABlockTransferDstScalarPerVector_K1, + 1, + 1, + AThreadTransferSrcResetCoordinateAfterRun, + true>( + a_b_k0_m_k1_grid_desc, + make_multi_index(k_batch_id, 0, m_block_data_idx_on_grid, 0), + a_element_op, + a_b_k0_m_k1_block_desc, + make_multi_index(0, 0, 0, 0), + ck::tensor_operation::element_wise::PassThrough{}); + + // B matrix blockwise copy + auto b_blockwise_copy = + ThreadGroupTensorSliceTransfer_v4r1, + BBlockTransferThreadClusterLengths_K0_N_K1, + BBlockTransferThreadClusterArrangeOrder, + FloatB, + LDSTypeB, + decltype(b_b_k0_n_k1_grid_desc), + decltype(b_b_k0_n_k1_block_desc), + BBlockTransferSrcAccessOrder, + Sequence<0, 2, 1, 3>, + BBlockTransferSrcVectorDim, + 3, + BBlockTransferSrcScalarPerVector, + BBlockTransferDstScalarPerVector_K1, + 1, + 1, + BThreadTransferSrcResetCoordinateAfterRun, + true>( + b_b_k0_n_k1_grid_desc, + make_multi_index(k_batch_id, 0, n_block_data_idx_on_grid, 0), + b_element_op, + b_b_k0_n_k1_block_desc, + make_multi_index(0, 0, 0, 0), + ck::tensor_operation::element_wise::PassThrough{}); + + // GEMM definition + // c_mtx += transpose(a_mtx) * b_mtx + // a_mtx[K0PerBlock, MPerBlock] is in LDS + // b_mtx[K0PerBlock, NPerBlock] is in LDS + // c_mtx[MPerBlock, NPerBlock] is distributed among threads, and saved in + // register + // sanity check + + auto blockwise_gemm = BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_Selector< + BlockSize, + LDSTypeA, + LDSTypeB, + FloatAcc, + decltype(a_k0_m_k1_block_desc), + decltype(b_k0_n_k1_block_desc), + MPerXDL, + NPerXDL, + MRepeat, + NRepeat, + K1, + LoopSched, + ComputeTypeA, + ComputeTypeB>(); + + auto c_thread_buf = blockwise_gemm.GetCThreadBuffer(); + + // LDS allocation for A and B: be careful of alignment + constexpr auto a_block_space_size = math::integer_least_multiple( + a_k0_m_k1_block_desc.GetElementSpaceSize(), max_lds_align); + + auto p_a_block = reinterpret_cast(p_shared_block); + auto p_b_block = reinterpret_cast(p_a_block + a_block_space_size); + + constexpr auto a_block_slice_copy_step = make_multi_index(0, K0PerBlock, 0, 0); + constexpr auto b_block_slice_copy_step = make_multi_index(0, K0PerBlock, 0, 0); + + auto a_block_buf = make_dynamic_buffer( + p_a_block, a_k0_m_k1_block_desc.GetElementSpaceSize()); + auto b_block_buf = make_dynamic_buffer( + p_b_block, b_k0_n_k1_block_desc.GetElementSpaceSize()); + + // gridwise GEMM pipeline + const index_t num_k_block_main_loop = __builtin_amdgcn_readfirstlane( + (a_b_k0_m_k1_grid_desc.GetLength(I1) * a_b_k0_m_k1_grid_desc.GetLength(I3)) / + (K0PerBlock * K1)); + + const auto gridwise_gemm_pipeline = GridwiseGemmPipe{}; + + gridwise_gemm_pipeline.template Run(a_b_k0_m_k1_grid_desc, + a_b_k0_m_k1_block_desc, + a_blockwise_copy, + a_grid_buf, + a_block_buf, + a_block_slice_copy_step, + b_b_k0_n_k1_grid_desc, + b_b_k0_n_k1_block_desc, + b_blockwise_copy, + b_grid_buf, + b_block_buf, + b_block_slice_copy_step, + blockwise_gemm, + c_thread_buf, + num_k_block_main_loop); + + // output: register to global memory { - return make_naive_tensor_descriptor_aligned( - make_tuple(Number<1>{}, Number{}, Number{}, K1), - max_lds_align); - } - }(); - // A matrix blockwise copy - auto a_blockwise_copy = - ThreadGroupTensorSliceTransfer_v4r1, - ABlockTransferThreadClusterLengths_K0_M_K1, - ABlockTransferThreadClusterArrangeOrder, - FloatA, - LDSTypeA, - decltype(a_b_k0_m_k1_grid_desc), - decltype(a_b_k0_m_k1_block_desc), - ABlockTransferSrcAccessOrder, - Sequence<0, 2, 1, 3>, - ABlockTransferSrcVectorDim, - 3, - ABlockTransferSrcScalarPerVector, - ABlockTransferDstScalarPerVector_K1, - 1, - 1, - AThreadTransferSrcResetCoordinateAfterRun, - true>( - a_b_k0_m_k1_grid_desc, - make_multi_index(k_batch_id, 0, m_block_data_idx_on_grid, 0), - a_element_op, - a_b_k0_m_k1_block_desc, - make_multi_index(0, 0, 0, 0), - ck::tensor_operation::element_wise::PassThrough{}); - - // B matrix blockwise copy - auto b_blockwise_copy = - ThreadGroupTensorSliceTransfer_v4r1, - BBlockTransferThreadClusterLengths_K0_N_K1, - BBlockTransferThreadClusterArrangeOrder, - FloatB, - LDSTypeB, - decltype(b_b_k0_n_k1_grid_desc), - decltype(b_b_k0_n_k1_block_desc), - BBlockTransferSrcAccessOrder, - Sequence<0, 2, 1, 3>, - BBlockTransferSrcVectorDim, - 3, - BBlockTransferSrcScalarPerVector, - BBlockTransferDstScalarPerVector_K1, - 1, - 1, - BThreadTransferSrcResetCoordinateAfterRun, - true>( - b_b_k0_n_k1_grid_desc, - make_multi_index(k_batch_id, 0, n_block_data_idx_on_grid, 0), - b_element_op, - b_b_k0_n_k1_block_desc, - make_multi_index(0, 0, 0, 0), - ck::tensor_operation::element_wise::PassThrough{}); - - // GEMM definition - // c_mtx += transpose(a_mtx) * b_mtx - // a_mtx[K0PerBlock, MPerBlock] is in LDS - // b_mtx[K0PerBlock, NPerBlock] is in LDS - // c_mtx[MPerBlock, NPerBlock] is distributed among threads, and saved in - // register - // sanity check - - auto blockwise_gemm = BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_Selector< - BlockSize, - LDSTypeA, - LDSTypeB, - FloatAcc, - decltype(a_k0_m_k1_block_desc), - decltype(b_k0_n_k1_block_desc), - MPerXDL, - NPerXDL, - MRepeat, - NRepeat, - K1, - LoopSched, - ComputeTypeA, - ComputeTypeB>(); - - auto c_thread_buf = blockwise_gemm.GetCThreadBuffer(); - - // LDS allocation for A and B: be careful of alignment - constexpr auto a_block_space_size = - math::integer_least_multiple(a_k0_m_k1_block_desc.GetElementSpaceSize(), max_lds_align); - - auto p_a_block = reinterpret_cast(p_shared_block); - auto p_b_block = reinterpret_cast(p_a_block + a_block_space_size); - - constexpr auto a_block_slice_copy_step = make_multi_index(0, K0PerBlock, 0, 0); - constexpr auto b_block_slice_copy_step = make_multi_index(0, K0PerBlock, 0, 0); - - auto a_block_buf = make_dynamic_buffer( - p_a_block, a_k0_m_k1_block_desc.GetElementSpaceSize()); - auto b_block_buf = make_dynamic_buffer( - p_b_block, b_k0_n_k1_block_desc.GetElementSpaceSize()); - - // gridwise GEMM pipeline - const index_t num_k_block_main_loop = __builtin_amdgcn_readfirstlane( - (a_b_k0_m_k1_grid_desc.GetLength(I1) * a_b_k0_m_k1_grid_desc.GetLength(I3)) / - (K0PerBlock * K1)); - - const auto gridwise_gemm_pipeline = GridwiseGemmPipe{}; - - gridwise_gemm_pipeline.template Run(a_b_k0_m_k1_grid_desc, - a_b_k0_m_k1_block_desc, - a_blockwise_copy, - a_grid_buf, - a_block_buf, - a_block_slice_copy_step, - b_b_k0_n_k1_grid_desc, - b_b_k0_n_k1_block_desc, - b_blockwise_copy, - b_grid_buf, - b_block_buf, - b_block_slice_copy_step, - blockwise_gemm, - c_thread_buf, - num_k_block_main_loop); - - // output: register to global memory - { - constexpr index_t MWave = MPerBlock / (MRepeat * MPerXDL); - constexpr index_t NWave = NPerBlock / (NRepeat * NPerXDL); - - constexpr auto c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc = - blockwise_gemm.GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2(); - - constexpr auto c_m0_n0_m1_n1_m2_m3_m4_n2_thread_desc = - blockwise_gemm.GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2(); - - constexpr auto M0 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I0); - constexpr auto N0 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I1); - constexpr auto M1 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I2); - constexpr auto N1 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I3); - constexpr auto M2 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I4); - constexpr auto M3 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I5); - constexpr auto M4 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I6); - constexpr auto N2 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I7); - - constexpr auto c_block_desc_mblock_mperblock_nblock_nperblock = - GetCBlockDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(); - - auto c_block_buf = make_dynamic_buffer( - static_cast(p_shared_block), - c_block_desc_mblock_mperblock_nblock_nperblock.GetElementSpaceSize()); - - constexpr auto c_block_desc_m0_n0_m1_n1_m2_m3_m4_n2 = transform_tensor_descriptor( - c_block_desc_mblock_mperblock_nblock_nperblock, - make_tuple( - make_freeze_transform(I0), // freeze mblock - make_unmerge_transform(make_tuple(CShuffleMRepeatPerShuffle, - M1, - M2, - M3, - M4)), // M1 = MWave, M2 * M3 * M4 = MPerXDL - make_freeze_transform(I0), // freeze nblock - make_unmerge_transform(make_tuple(CShuffleNRepeatPerShuffle, - N1, - N2))), // M1 = MWave, M2 * M3 * M4 = MPerXDL - make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}), - make_tuple( - Sequence<>{}, Sequence<0, 2, 4, 5, 6>{}, Sequence<>{}, Sequence<1, 3, 7>{})); - - // calculate origin of thread output tensor on global memory - // blockwise GEMM c matrix starting index - const auto c_thread_mtx_on_block = - blockwise_gemm.CalculateCThreadOriginDataIndex(I0, I0, I0, I0); - - const index_t m_thread_data_on_block = c_thread_mtx_on_block[I0]; - const index_t n_thread_data_on_block = c_thread_mtx_on_block[I1]; - - const auto m_thread_data_on_block_to_m0_m1_m2_m3_m4_adaptor = - make_single_stage_tensor_adaptor( - make_tuple(make_merge_transform(make_tuple(M0, M1, M2, M3, M4))), - make_tuple(Sequence<0, 1, 2, 3, 4>{}), - make_tuple(Sequence<0>{})); - - const auto m_thread_data_on_block_idx = - m_thread_data_on_block_to_m0_m1_m2_m3_m4_adaptor.CalculateBottomIndex( - make_multi_index(m_thread_data_on_block)); - - const auto n_thread_data_on_block_to_n0_n1_n2_adaptor = - make_single_stage_tensor_adaptor( - make_tuple(make_merge_transform(make_tuple(N0, N1, N2))), - make_tuple(Sequence<0, 1, 2>{}), - make_tuple(Sequence<0>{})); - - const auto n_thread_data_on_block_idx = - n_thread_data_on_block_to_n0_n1_n2_adaptor.CalculateBottomIndex( - make_multi_index(n_thread_data_on_block)); - - // VGPR to LDS - auto c_thread_copy_vgpr_to_lds = - ThreadwiseTensorSliceTransfer_v1r3, - Sequence<0, 1, 2, 3, 4, 5, 6, 7>, - 7, - 1, - InMemoryDataOperationEnum::Set, - 1, - true>{ - c_block_desc_m0_n0_m1_n1_m2_m3_m4_n2, - make_multi_index(0, - 0, - m_thread_data_on_block_idx[I1], - n_thread_data_on_block_idx[I1], - m_thread_data_on_block_idx[I2], - m_thread_data_on_block_idx[I3], - m_thread_data_on_block_idx[I4], - n_thread_data_on_block_idx[I2]), - ck::tensor_operation::element_wise::PassThrough{}}; - - // LDS to global - auto c_block_copy_lds_to_global = ThreadGroupTensorSliceTransfer_v6r1< - ThisThreadBlock, // index_t BlockSize, - CElementwiseOperation, // ElementwiseOperation, - CGlobalMemoryDataOperation, // DstInMemOp, - Sequence<1, - CShuffleMRepeatPerShuffle * MWave * MPerXDL, - 1, - CShuffleNRepeatPerShuffle * NWave * NPerXDL>, // BlockSliceLengths, - CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, - Sequence<0, 1, 2, 3>, // typename ThreadClusterArrangeOrder, - FloatC, // typename SrcData, - FloatC, // typename DstData, - decltype(c_block_desc_mblock_mperblock_nblock_nperblock), - decltype(c_grid_desc_mblock_mperblock_nblock_nperblock), - Sequence<0, 1, 2, 3>, // typename DimAccessOrder, - 3, // index_t VectorDim, - CBlockTransferScalarPerVector_NWaveNPerXDL, // index_t ScalarPerVector, - true, // bool ThreadTransferSrcResetCoordinateAfterRun, - false> // bool ThreadTransferDstResetCoordinateAfterRun - {c_block_desc_mblock_mperblock_nblock_nperblock, - make_multi_index(0, 0, 0, 0), - c_grid_desc_mblock_mperblock_nblock_nperblock, - make_multi_index(block_m_id, 0, block_n_id, 0), - c_element_op}; - - constexpr auto mxdlperwave_forward_step = - make_multi_index(0, CShuffleMRepeatPerShuffle * MWave * MPerXDL, 0, 0); - constexpr auto nxdlperwave_forward_step = - make_multi_index(0, 0, 0, CShuffleNRepeatPerShuffle * NWave * NPerXDL); - constexpr auto nxdlperwave_backward_step = - make_multi_index(0, 0, 0, -CShuffleNRepeatPerShuffle * NWave * NPerXDL); - - static_for<0, MRepeat, CShuffleMRepeatPerShuffle>{}([&](auto mxdlperwave_iter) { - constexpr auto mxdlperwave = mxdlperwave_iter; - - static_for<0, NRepeat, CShuffleNRepeatPerShuffle>{}([&](auto nxdlperwave_iter) { - constexpr bool nxdlperwave_forward_sweep = - (mxdlperwave % (2 * CShuffleMRepeatPerShuffle) == 0); - - constexpr index_t nxdlperwave_value = - nxdlperwave_forward_sweep - ? nxdlperwave_iter - : (NRepeat - nxdlperwave_iter - CShuffleNRepeatPerShuffle); - - constexpr auto nxdlperwave = Number{}; - - // make sure it's safe to do ds_write - block_sync_lds(); - - // VGPR to LDS - c_thread_copy_vgpr_to_lds.Run( - c_m0_n0_m1_n1_m2_m3_m4_n2_thread_desc, - make_tuple(mxdlperwave, nxdlperwave, I0, I0, I0, I0, I0, I0), - c_thread_buf, - c_block_desc_m0_n0_m1_n1_m2_m3_m4_n2, - c_block_buf); - - // make sure it's safe to do ds_read - block_sync_lds(); - - // LDS to global - c_block_copy_lds_to_global.Run(c_block_desc_mblock_mperblock_nblock_nperblock, - c_block_buf, - c_grid_desc_mblock_mperblock_nblock_nperblock, - c_grid_buf); - - // move on nxdlperwave dimension - if constexpr(nxdlperwave_forward_sweep && - (nxdlperwave < NRepeat - CShuffleNRepeatPerShuffle)) - { - c_block_copy_lds_to_global.MoveDstSliceWindow( + constexpr index_t MWave = MPerBlock / (MRepeat * MPerXDL); + constexpr index_t NWave = NPerBlock / (NRepeat * NPerXDL); + + constexpr auto c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc = + blockwise_gemm.GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2(); + + constexpr auto c_m0_n0_m1_n1_m2_m3_m4_n2_thread_desc = + blockwise_gemm.GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2(); + + constexpr auto M0 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I0); + constexpr auto N0 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I1); + constexpr auto M1 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I2); + constexpr auto N1 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I3); + constexpr auto M2 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I4); + constexpr auto M3 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I5); + constexpr auto M4 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I6); + constexpr auto N2 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I7); + + constexpr auto c_block_desc_mblock_mperblock_nblock_nperblock = + GetCBlockDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(); + + auto c_block_buf = make_dynamic_buffer( + static_cast(p_shared_block), + c_block_desc_mblock_mperblock_nblock_nperblock.GetElementSpaceSize()); + + constexpr auto c_block_desc_m0_n0_m1_n1_m2_m3_m4_n2 = transform_tensor_descriptor( + c_block_desc_mblock_mperblock_nblock_nperblock, + make_tuple(make_freeze_transform(I0), // freeze mblock + make_unmerge_transform( + make_tuple(CShuffleMRepeatPerShuffle, + M1, + M2, + M3, + M4)), // M1 = MWave, M2 * M3 * M4 = MPerXDL + make_freeze_transform(I0), // freeze nblock + make_unmerge_transform( + make_tuple(CShuffleNRepeatPerShuffle, + N1, + N2))), // M1 = MWave, M2 * M3 * M4 = MPerXDL + make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}), + make_tuple(Sequence<>{}, + Sequence<0, 2, 4, 5, 6>{}, + Sequence<>{}, + Sequence<1, 3, 7>{})); + + // calculate origin of thread output tensor on global memory + // blockwise GEMM c matrix starting index + const auto c_thread_mtx_on_block = + blockwise_gemm.CalculateCThreadOriginDataIndex(I0, I0, I0, I0); + + const index_t m_thread_data_on_block = c_thread_mtx_on_block[I0]; + const index_t n_thread_data_on_block = c_thread_mtx_on_block[I1]; + + const auto m_thread_data_on_block_to_m0_m1_m2_m3_m4_adaptor = + make_single_stage_tensor_adaptor( + make_tuple(make_merge_transform(make_tuple(M0, M1, M2, M3, M4))), + make_tuple(Sequence<0, 1, 2, 3, 4>{}), + make_tuple(Sequence<0>{})); + + const auto m_thread_data_on_block_idx = + m_thread_data_on_block_to_m0_m1_m2_m3_m4_adaptor.CalculateBottomIndex( + make_multi_index(m_thread_data_on_block)); + + const auto n_thread_data_on_block_to_n0_n1_n2_adaptor = + make_single_stage_tensor_adaptor( + make_tuple(make_merge_transform(make_tuple(N0, N1, N2))), + make_tuple(Sequence<0, 1, 2>{}), + make_tuple(Sequence<0>{})); + + const auto n_thread_data_on_block_idx = + n_thread_data_on_block_to_n0_n1_n2_adaptor.CalculateBottomIndex( + make_multi_index(n_thread_data_on_block)); + + // VGPR to LDS + auto c_thread_copy_vgpr_to_lds = ThreadwiseTensorSliceTransfer_v1r3< + FloatAcc, + FloatC, + decltype(c_m0_n0_m1_n1_m2_m3_m4_n2_thread_desc), + decltype(c_block_desc_m0_n0_m1_n1_m2_m3_m4_n2), + ck::tensor_operation::element_wise::PassThrough, + Sequence, + Sequence<0, 1, 2, 3, 4, 5, 6, 7>, + 7, + 1, + InMemoryDataOperationEnum::Set, + 1, + true>{c_block_desc_m0_n0_m1_n1_m2_m3_m4_n2, + make_multi_index(0, + 0, + m_thread_data_on_block_idx[I1], + n_thread_data_on_block_idx[I1], + m_thread_data_on_block_idx[I2], + m_thread_data_on_block_idx[I3], + m_thread_data_on_block_idx[I4], + n_thread_data_on_block_idx[I2]), + ck::tensor_operation::element_wise::PassThrough{}}; + + // LDS to global + auto c_block_copy_lds_to_global = ThreadGroupTensorSliceTransfer_v6r1< + ThisThreadBlock, // index_t BlockSize, + CElementwiseOperation, // ElementwiseOperation, + CGlobalMemoryDataOperation, // DstInMemOp, + Sequence<1, + CShuffleMRepeatPerShuffle * MWave * MPerXDL, + 1, + CShuffleNRepeatPerShuffle * NWave * NPerXDL>, // BlockSliceLengths, + CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, + Sequence<0, 1, 2, 3>, // typename ThreadClusterArrangeOrder, + FloatC, // typename SrcData, + FloatC, // typename DstData, + decltype(c_block_desc_mblock_mperblock_nblock_nperblock), + decltype(c_grid_desc_mblock_mperblock_nblock_nperblock), + Sequence<0, 1, 2, 3>, // typename DimAccessOrder, + 3, // index_t VectorDim, + CBlockTransferScalarPerVector_NWaveNPerXDL, // index_t ScalarPerVector, + true, // bool ThreadTransferSrcResetCoordinateAfterRun, + false> // bool ThreadTransferDstResetCoordinateAfterRun + {c_block_desc_mblock_mperblock_nblock_nperblock, + make_multi_index(0, 0, 0, 0), + c_grid_desc_mblock_mperblock_nblock_nperblock, + make_multi_index(block_m_id, 0, block_n_id, 0), + c_element_op}; + + constexpr auto mxdlperwave_forward_step = + make_multi_index(0, CShuffleMRepeatPerShuffle * MWave * MPerXDL, 0, 0); + constexpr auto nxdlperwave_forward_step = + make_multi_index(0, 0, 0, CShuffleNRepeatPerShuffle * NWave * NPerXDL); + constexpr auto nxdlperwave_backward_step = + make_multi_index(0, 0, 0, -CShuffleNRepeatPerShuffle * NWave * NPerXDL); + + static_for<0, MRepeat, CShuffleMRepeatPerShuffle>{}([&](auto mxdlperwave_iter) { + constexpr auto mxdlperwave = mxdlperwave_iter; + + static_for<0, NRepeat, CShuffleNRepeatPerShuffle>{}([&](auto nxdlperwave_iter) { + constexpr bool nxdlperwave_forward_sweep = + (mxdlperwave % (2 * CShuffleMRepeatPerShuffle) == 0); + + constexpr index_t nxdlperwave_value = + nxdlperwave_forward_sweep + ? nxdlperwave_iter + : (NRepeat - nxdlperwave_iter - CShuffleNRepeatPerShuffle); + + constexpr auto nxdlperwave = Number{}; + + // make sure it's safe to do ds_write + block_sync_lds(); + + // VGPR to LDS + c_thread_copy_vgpr_to_lds.Run( + c_m0_n0_m1_n1_m2_m3_m4_n2_thread_desc, + make_tuple(mxdlperwave, nxdlperwave, I0, I0, I0, I0, I0, I0), + c_thread_buf, + c_block_desc_m0_n0_m1_n1_m2_m3_m4_n2, + c_block_buf); + + // make sure it's safe to do ds_read + block_sync_lds(); + + // LDS to global + c_block_copy_lds_to_global.Run( + c_block_desc_mblock_mperblock_nblock_nperblock, + c_block_buf, c_grid_desc_mblock_mperblock_nblock_nperblock, - nxdlperwave_forward_step); - } - else if constexpr((!nxdlperwave_forward_sweep) && (nxdlperwave > 0)) + c_grid_buf); + + // move on nxdlperwave dimension + if constexpr(nxdlperwave_forward_sweep && + (nxdlperwave < NRepeat - CShuffleNRepeatPerShuffle)) + { + c_block_copy_lds_to_global.MoveDstSliceWindow( + c_grid_desc_mblock_mperblock_nblock_nperblock, + nxdlperwave_forward_step); + } + else if constexpr((!nxdlperwave_forward_sweep) && (nxdlperwave > 0)) + { + c_block_copy_lds_to_global.MoveDstSliceWindow( + c_grid_desc_mblock_mperblock_nblock_nperblock, + nxdlperwave_backward_step); + } + }); + + // move on mxdlperwave dimension + if constexpr(mxdlperwave < MRepeat - CShuffleMRepeatPerShuffle) { c_block_copy_lds_to_global.MoveDstSliceWindow( c_grid_desc_mblock_mperblock_nblock_nperblock, - nxdlperwave_backward_step); + mxdlperwave_forward_step); } }); - - // move on mxdlperwave dimension - if constexpr(mxdlperwave < MRepeat - CShuffleMRepeatPerShuffle) - { - c_block_copy_lds_to_global.MoveDstSliceWindow( - c_grid_desc_mblock_mperblock_nblock_nperblock, mxdlperwave_forward_step); - } - }); + } } } diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_permute.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_permute.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_set_buffer_value.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_set_buffer_value.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_set_multiple_buffer_value.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_set_multiple_buffer_value.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_softmax.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_softmax.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_sparse_embeddings_forward_layernorm.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_sparse_embeddings_forward_layernorm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_tensor_rearrange.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_tensor_rearrange.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_bwd_data.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_bwd_data.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_bwd_gamma_beta.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_bwd_gamma_beta.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_naive_variance.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_naive_variance.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_selector.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_selector.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_1st.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_1st.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_2nd.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_2nd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_welford_variance.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_welford_variance.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp b/include/ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_contraction_dl.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_contraction_dl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_gemm_dlops_v3.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_gemm_dlops_v3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_set.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_set.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1_dequant.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1_dequant.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v4r1.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v4r1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v5r1.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v5r1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1r2.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1r2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7r2.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7r2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_welford.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_welford.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp b/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp b/include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp b/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm.hpp b/include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm_arraybase.hpp b/include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm_arraybase.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp b/include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp b/include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/amd_address_space.hpp b/include/ck/utility/amd_address_space.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/amd_buffer_addressing.hpp b/include/ck/utility/amd_buffer_addressing.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/amd_gemm_dpp.hpp b/include/ck/utility/amd_gemm_dpp.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/amd_inline_asm.hpp b/include/ck/utility/amd_inline_asm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/amd_lds.hpp b/include/ck/utility/amd_lds.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/amd_wave_read_first_lane.hpp b/include/ck/utility/amd_wave_read_first_lane.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/amd_wmma.hpp b/include/ck/utility/amd_wmma.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/amd_xdlops.hpp b/include/ck/utility/amd_xdlops.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/array.hpp b/include/ck/utility/array.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/array_multi_index.hpp b/include/ck/utility/array_multi_index.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/c_style_pointer_cast.hpp b/include/ck/utility/c_style_pointer_cast.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/common_header.hpp b/include/ck/utility/common_header.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/container_element_picker.hpp b/include/ck/utility/container_element_picker.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/container_helper.hpp b/include/ck/utility/container_helper.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/data_type.hpp b/include/ck/utility/data_type.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/debug.hpp b/include/ck/utility/debug.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/dynamic_buffer.hpp b/include/ck/utility/dynamic_buffer.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/enable_if.hpp b/include/ck/utility/enable_if.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/f8_utils.hpp b/include/ck/utility/f8_utils.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/functional.hpp b/include/ck/utility/functional.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/functional2.hpp b/include/ck/utility/functional2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/functional3.hpp b/include/ck/utility/functional3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/functional4.hpp b/include/ck/utility/functional4.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/generic_memory_space_atomic.hpp b/include/ck/utility/generic_memory_space_atomic.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/get_id.hpp b/include/ck/utility/get_id.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/get_shift.hpp b/include/ck/utility/get_shift.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/ignore.hpp b/include/ck/utility/ignore.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/inner_product.hpp b/include/ck/utility/inner_product.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/inner_product_dpp8.hpp b/include/ck/utility/inner_product_dpp8.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/integral_constant.hpp b/include/ck/utility/integral_constant.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/is_detected.hpp b/include/ck/utility/is_detected.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/is_known_at_compile_time.hpp b/include/ck/utility/is_known_at_compile_time.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/loop_scheduler.hpp b/include/ck/utility/loop_scheduler.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/magic_division.hpp b/include/ck/utility/magic_division.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/math.hpp b/include/ck/utility/math.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/math_v2.hpp b/include/ck/utility/math_v2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/multi_index.hpp b/include/ck/utility/multi_index.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/number.hpp b/include/ck/utility/number.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/random_gen.hpp b/include/ck/utility/random_gen.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/reduction_common.hpp b/include/ck/utility/reduction_common.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/reduction_enums.hpp b/include/ck/utility/reduction_enums.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/reduction_functions_accumulate.hpp b/include/ck/utility/reduction_functions_accumulate.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/reduction_operator.hpp b/include/ck/utility/reduction_operator.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/sequence.hpp b/include/ck/utility/sequence.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/sequence_helper.hpp b/include/ck/utility/sequence_helper.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/span.hpp b/include/ck/utility/span.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/static_buffer.hpp b/include/ck/utility/static_buffer.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/statically_indexed_array.hpp b/include/ck/utility/statically_indexed_array.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/statically_indexed_array_multi_index.hpp b/include/ck/utility/statically_indexed_array_multi_index.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/synchronization.hpp b/include/ck/utility/synchronization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/thread_group.hpp b/include/ck/utility/thread_group.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/transpose_vectors.hpp b/include/ck/utility/transpose_vectors.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/tuple.hpp b/include/ck/utility/tuple.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/tuple_helper.hpp b/include/ck/utility/tuple_helper.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/type.hpp b/include/ck/utility/type.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/type_convert.hpp b/include/ck/utility/type_convert.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/workgroup_barrier.hpp b/include/ck/utility/workgroup_barrier.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/workgroup_synchronization.hpp b/include/ck/utility/workgroup_synchronization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/version.h.in b/include/ck/version.h.in old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/layout.hpp b/include/ck/wrapper/layout.hpp old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/operations/copy.hpp b/include/ck/wrapper/operations/copy.hpp old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/operations/gemm.hpp b/include/ck/wrapper/operations/gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/tensor.hpp b/include/ck/wrapper/tensor.hpp old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/traits/blockwise_gemm_xdl_traits.hpp b/include/ck/wrapper/traits/blockwise_gemm_xdl_traits.hpp old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/utils/kernel_utils.hpp b/include/ck/wrapper/utils/kernel_utils.hpp old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/utils/layout_utils.hpp b/include/ck/wrapper/utils/layout_utils.hpp old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/utils/tensor_partition.hpp b/include/ck/wrapper/utils/tensor_partition.hpp old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/utils/tensor_utils.hpp b/include/ck/wrapper/utils/tensor_utils.hpp old mode 100644 new mode 100755 diff --git a/library/CMakeLists.txt b/library/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_avgpool_bwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_avgpool_bwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_backward.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_backward.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_forward.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_forward.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_infer.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_infer.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_contraction.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_contraction.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_fpAintB_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_fpAintB_gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm_bwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm_bwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm_bwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm_bwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_reduce.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_reduce.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_softmax.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_softmax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/gpu/naive_conv_fwd.hpp b/library/include/ck/library/reference_tensor_operation/gpu/naive_conv_fwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp b/library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/device_operation_instance_factory.hpp b/library/include/ck/library/tensor_operation_instance/device_operation_instance_factory.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/avg_pool3d_bwd.hpp b/library/include/ck/library/tensor_operation_instance/gpu/avg_pool3d_bwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_permute.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_permute.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_softmax_gemm_permute.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_softmax_gemm_permute.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_multi_d.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_multi_d.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_backward.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_backward.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_forward.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_forward.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_infer.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_infer.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/contraction/device_contraction_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/contraction/device_contraction_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/contraction_bilinear.hpp b/library/include/ck/library/tensor_operation_instance/gpu/contraction_bilinear.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/contraction_scale.hpp b/library/include/ck/library/tensor_operation_instance/gpu/contraction_scale.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange.hpp b/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/convolution_backward_data.hpp b/library/include/ck/library/tensor_operation_instance/gpu/convolution_backward_data.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/convolution_forward.hpp b/library/include/ck/library/tensor_operation_instance/gpu/convolution_forward.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/device_elementwise_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/device_elementwise_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_mean_squaremean_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_mean_squaremean_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/elementwise_normalization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/elementwise_normalization.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_add_fastgelu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_add_fastgelu.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_fastgelu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_fastgelu.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_multiply.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_multiply.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_silu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_silu.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_bilinear.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_bilinear.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_fastgelu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_fastgelu.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_multiply_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_multiply_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_streamk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_streamk.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_wmma_f16_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_wmma_f16_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_wmma_i8_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_wmma_i8_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_bilinear_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_bilinear_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_scale_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_scale_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_dl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_dl_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_wmma_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_wmma_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_dl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_dl_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_bilinear_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_bilinear_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scale_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scale_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_ab_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_ab_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data_bilinear.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data_bilinear.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data_scale.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data_scale.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_weight.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_weight.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_bilinear.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_bilinear.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scale.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scale.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_ab.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_ab.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_scaleadd_relu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_scaleadd_relu.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_bias.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_bias.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fastgelu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fastgelu.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fixed_nk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fixed_nk.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/groupnorm_bwd_data.hpp b/library/include/ck/library/tensor_operation_instance/gpu/groupnorm_bwd_data.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/groupnorm_bwd_gamma_beta.hpp b/library/include/ck/library/tensor_operation_instance/gpu/groupnorm_bwd_gamma_beta.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/layernorm_bwd_data.hpp b/library/include/ck/library/tensor_operation_instance/gpu/layernorm_bwd_data.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/layernorm_bwd_gamma_beta.hpp b/library/include/ck/library/tensor_operation_instance/gpu/layernorm_bwd_gamma_beta.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/max_pool_bwd.hpp b/library/include/ck/library/tensor_operation_instance/gpu/max_pool_bwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/normalization_fwd.hpp b/library/include/ck/library/tensor_operation_instance/gpu/normalization_fwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/normalization_fwd_swish.hpp b/library/include/ck/library/tensor_operation_instance/gpu/normalization_fwd_swish.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/permute_scale.hpp b/library/include/ck/library/tensor_operation_instance/gpu/permute_scale.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/permute_scale/device_permute_scale_instances.hpp b/library/include/ck/library/tensor_operation_instance/gpu/permute_scale/device_permute_scale_instances.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/pool3d_fwd.hpp b/library/include/ck/library/tensor_operation_instance/gpu/pool3d_fwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/quantization/gemm_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/gemm_quantization.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perchannel_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perchannel_quantization.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perlayer_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perlayer_quantization.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perchannel_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perchannel_quantization.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perlayer_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perlayer_quantization.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_impl_common.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_impl_common.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/reduce.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/reduce.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/transpose/device_transpose_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/transpose/device_transpose_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/transpose_3d.hpp b/library/include/ck/library/tensor_operation_instance/gpu/transpose_3d.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/algorithm.hpp b/library/include/ck/library/utility/algorithm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/check_err.hpp b/library/include/ck/library/utility/check_err.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/conv_common.hpp b/library/include/ck/library/utility/conv_common.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp b/library/include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/convolution_parameter.hpp b/library/include/ck/library/utility/convolution_parameter.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/device_memory.hpp b/library/include/ck/library/utility/device_memory.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/fill.hpp b/library/include/ck/library/utility/fill.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/host_common_util.hpp b/library/include/ck/library/utility/host_common_util.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/host_gemm.hpp b/library/include/ck/library/utility/host_gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/host_tensor.hpp b/library/include/ck/library/utility/host_tensor.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/host_tensor_generator.hpp b/library/include/ck/library/utility/host_tensor_generator.hpp old mode 100644 new mode 100755 index 6fd7ed8aa854131badcf845b4ad5d8a23d158ebc..b55cfb65581bbaf2dbfdf362451a58652d72c36e --- a/library/include/ck/library/utility/host_tensor_generator.hpp +++ b/library/include/ck/library/utility/host_tensor_generator.hpp @@ -142,6 +142,45 @@ struct GeneratorTensor_3 } }; +// template +// struct GeneratorTensor_3_control_entropy +// { + +// // constexpr static int fp32_exponent_size = 8; +// // constexpr static int fp32_mantissa_size = 23; +// constexpr static int fp16_exponent_size = 5; +// constexpr static int fp16_mantissa_size = 10; + +// mutable std::mt19937 gen{std::random_device{}()}; +// mutable std::uniform_int_distribution dis{0, 0xFFFF}; +// float MAX_FP32_NUM = 0xFFFFFFFF; +// float MAX_FP16_NUM = 0xFFFF; + +// template +// T operator()(Is...) const +// { +// uint16_t bits = dis(gen); // Generate 32 random bits + +// // Combine the bits into a floating-point number according to IEEE 754 format +// uint16_t sign = bits >> 15; // Get the sign bit +// uint16_t exponent = (bits >> 10) & 0x1F; // Get the exponent bits +// uint16_t mantissa = bits & 0x3FF; // Get the mantissa bits + +// if(exponent == 0x1F || exponent == 0) +// { +// // Avoid NaN and denormalized numbers +// return static_cast(0.0); +// } + +// // Shift the exponent to the correct position and set the sign bit + +// int16_t result = (sign << 15) | ((exponent - 15 + 0x1F) << 10) | +// (mantissa >> (10 - fp16_mantissa_size)); +// float v= static_cast(*reinterpret_cast(std::bitset<16>(result).to_string().c_str())) / MAX_FP16_NUM; +// printf("%0f,\t", v / MAX_FP16_NUM); +// return static_cast(vector_type_maker / MAX_FP16_NUM); +// } +// }; template <> struct GeneratorTensor_3 { diff --git a/library/include/ck/library/utility/iterator.hpp b/library/include/ck/library/utility/iterator.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/literals.hpp b/library/include/ck/library/utility/literals.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/numeric.hpp b/library/include/ck/library/utility/numeric.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/ranges.hpp b/library/include/ck/library/utility/ranges.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/avg_pool3d_bwd_ndhwc_instance_common.hpp b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/avg_pool3d_bwd_ndhwc_instance_common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batchnorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f64_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f64_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f64_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f64_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f64_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f64_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/column_to_image/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/column_to_image/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_gndhwc_3d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_gndhwc_3d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_gnhwc_2d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_gnhwc_2d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_gnwc_1d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_gnwc_1d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_ndhwgc_3d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_ndhwgc_3d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwgc_2d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwgc_2d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nwgc_1d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nwgc_1d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/elementwise/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/elementwise/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/elementwise/device_normalize_instance.cpp b/library/src/tensor_operation_instance/gpu/elementwise/device_normalize_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/elementwise_normalization/device_elementwise_normalization_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/elementwise_normalization/device_elementwise_normalization_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_f16_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_f16_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f8_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f8_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f8_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f8_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_default_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_default_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_default_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_default_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_padded_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_padded_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_padded_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_padded_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_default_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_default_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_padded_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_padded_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/common.hpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_add_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_add_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_opt_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_opt_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_interwave_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_add_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_add_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_opt_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_opt_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_interwave_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_add_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_add_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_opt_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_opt_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_interwave_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_add_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_add_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_opt_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_opt_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_interwave_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add/device_gemm_add_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add/device_gemm_add_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add/device_gemm_add_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add/device_gemm_add_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_multiply/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_relu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu/device_gemm_add_relu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu/device_gemm_add_relu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu/device_gemm_add_relu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu/device_gemm_add_relu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_silu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_silu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_silu/device_gemm_add_silu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_silu/device_gemm_add_silu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_silu/device_gemm_add_silu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_silu/device_gemm_add_silu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_fastgelu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_multiply_add/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_splitk/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_interwave_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_interwave_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_kpb128_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_kpb128_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_interwave_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_interwave_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_interwave_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_interwave_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_streamk/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_streamk/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_streamk/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_streamk/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f8_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f8_f16_mk_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f8_f16_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f8_f16_f16_mk_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/image_to_column/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/image_to_column/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_gndhwc_3d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_gndhwc_3d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_gnhwc_2d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_gnhwc_2d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_gnwc_1d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_gnwc_1d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_ndhwgc_3d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_ndhwgc_3d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwgc_2d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwgc_2d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nwgc_1d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nwgc_1d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/max_pool_bwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/max_pool_bwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/max_pool_bwd/max_pool_bwd_instance_common.hpp b/library/src/tensor_operation_instance/gpu/max_pool_bwd/max_pool_bwd_instance_common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/normalization_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_data/device_groupnorm_bwd_data_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_bwd_data/device_groupnorm_bwd_data_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_data/device_layernorm2d_bwd_data_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_bwd_data/device_layernorm2d_bwd_data_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_data/device_layernorm2d_bwd_data_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_bwd_data/device_layernorm2d_bwd_data_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_data/normalization_bwd_data_instance_common.hpp b/library/src/tensor_operation_instance/gpu/normalization_bwd_data/normalization_bwd_data_instance_common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/device_groupnorm_bwd_gamma_beta_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/device_groupnorm_bwd_gamma_beta_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/device_layernorm2d_bwd_gamma_beta_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/device_layernorm2d_bwd_gamma_beta_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/device_layernorm2d_bwd_gamma_beta_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/device_layernorm2d_bwd_gamma_beta_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/normalization_bwd_gamma_beta_instance_common.hpp b/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/normalization_bwd_gamma_beta_instance_common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/normalization_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_swish_f16_f32_f32_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_swish_f16_f32_f32_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_swish_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_swish_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_swish_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_swish_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_layernorm2d_fwd_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_layernorm2d_fwd_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_layernorm2d_fwd_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_layernorm2d_fwd_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_layernorm4d_fwd_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_layernorm4d_fwd_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_layernorm4d_fwd_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_layernorm4d_fwd_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/normalization_fwd_instance_common.hpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/normalization_fwd_instance_common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/permute_scale/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/permute_scale/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_1d_instances.cpp b/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_1d_instances.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_2d_instances.cpp b/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_2d_instances.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_3d_instances.cpp b/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_3d_instances.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_4d_instances.cpp b/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_4d_instances.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_5d_instances.cpp b/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_5d_instances.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_6d_instances.cpp b/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_6d_instances.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/pool3d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/pool3d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/pool3d_fwd/pool_fwd_instance_common.hpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/pool_fwd_instance_common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/quantization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/conv2d_quantization_common.hpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/conv2d_quantization_common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perchannel_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perchannel_quantization_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_int8_instance.hpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_int8_instance.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perchannel_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perchannel_quantization_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perlayer_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perlayer_quantization_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_int8_instance.hpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_int8_instance.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_instance.hpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_instance.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/gemm_quantization_common.hpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/gemm_quantization_common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/transpose/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/transpose/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/transpose/device_transpose_instances_3d.cpp b/library/src/tensor_operation_instance/gpu/transpose/device_transpose_instances_3d.cpp old mode 100644 new mode 100755 diff --git a/library/src/utility/CMakeLists.txt b/library/src/utility/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/utility/convolution_parameter.cpp b/library/src/utility/convolution_parameter.cpp old mode 100644 new mode 100755 diff --git a/library/src/utility/device_memory.cpp b/library/src/utility/device_memory.cpp old mode 100644 new mode 100755 diff --git a/library/src/utility/host_tensor.cpp b/library/src/utility/host_tensor.cpp old mode 100644 new mode 100755 diff --git a/profiler/CMakeLists.txt b/profiler/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/profiler/README.md b/profiler/README.md old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/data_type_enum.hpp b/profiler/include/profiler/data_type_enum.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp b/profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp b/profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp b/profiler/include/profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp b/profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batched_gemm_impl.hpp b/profiler/include/profiler/profile_batched_gemm_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp b/profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp b/profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp b/profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batchnorm_backward_impl.hpp b/profiler/include/profiler/profile_batchnorm_backward_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batchnorm_forward_impl.hpp b/profiler/include/profiler/profile_batchnorm_forward_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batchnorm_infer_impl.hpp b/profiler/include/profiler/profile_batchnorm_infer_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_contraction_impl.hpp b/profiler/include/profiler/profile_contraction_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_contraction_utils.hpp b/profiler/include/profiler/profile_contraction_utils.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_conv_bwd_data_impl.hpp b/profiler/include/profiler/profile_conv_bwd_data_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp b/profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp b/profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_conv_fwd_impl.hpp b/profiler/include/profiler/profile_conv_fwd_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_conv_tensor_rearrange_impl.hpp b/profiler/include/profiler/profile_conv_tensor_rearrange_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_elementwise_layernorm_impl.hpp b/profiler/include/profiler/profile_elementwise_layernorm_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp b/profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp b/profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_add_impl.hpp b/profiler/include/profiler/profile_gemm_add_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_add_multiply_impl.hpp b/profiler/include/profiler/profile_gemm_add_multiply_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_add_relu_add_layernorm_impl.hpp b/profiler/include/profiler/profile_gemm_add_relu_add_layernorm_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_add_relu_impl.hpp b/profiler/include/profiler/profile_gemm_add_relu_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_add_silu_impl.hpp b/profiler/include/profiler/profile_gemm_add_silu_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_bias_add_reduce_impl.hpp b/profiler/include/profiler/profile_gemm_bias_add_reduce_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_bilinear_impl.hpp b/profiler/include/profiler/profile_gemm_bilinear_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_fastgelu_impl.hpp b/profiler/include/profiler/profile_gemm_fastgelu_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_impl.hpp b/profiler/include/profiler/profile_gemm_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_multiply_add_impl.hpp b/profiler/include/profiler/profile_gemm_multiply_add_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_reduce_impl.hpp b/profiler/include/profiler/profile_gemm_reduce_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_splitk_impl.hpp b/profiler/include/profiler/profile_gemm_splitk_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_streamk_impl.hpp b/profiler/include/profiler/profile_gemm_streamk_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_grouped_conv_bwd_data_impl.hpp b/profiler/include/profiler/profile_grouped_conv_bwd_data_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp b/profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp b/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_grouped_gemm_fastgelu_impl.hpp b/profiler/include/profiler/profile_grouped_gemm_fastgelu_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_grouped_gemm_fixed_nk_impl.hpp b/profiler/include/profiler/profile_grouped_gemm_fixed_nk_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_grouped_gemm_impl.hpp b/profiler/include/profiler/profile_grouped_gemm_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_groupnorm_bwd_data_impl.hpp b/profiler/include/profiler/profile_groupnorm_bwd_data_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_groupnorm_bwd_gamma_beta_impl.hpp b/profiler/include/profiler/profile_groupnorm_bwd_gamma_beta_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_groupnorm_fwd_impl.hpp b/profiler/include/profiler/profile_groupnorm_fwd_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_layernorm_bwd_data_impl.hpp b/profiler/include/profiler/profile_layernorm_bwd_data_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_layernorm_bwd_gamma_beta_impl.hpp b/profiler/include/profiler/profile_layernorm_bwd_gamma_beta_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_layernorm_fwd_impl.hpp b/profiler/include/profiler/profile_layernorm_fwd_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_max_pool3d_bwd_impl.hpp b/profiler/include/profiler/profile_max_pool3d_bwd_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_permute_scale_impl.hpp b/profiler/include/profiler/profile_permute_scale_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_pool3d_fwd_impl.hpp b/profiler/include/profiler/profile_pool3d_fwd_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_reduce_impl.hpp b/profiler/include/profiler/profile_reduce_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_softmax_impl.hpp b/profiler/include/profiler/profile_softmax_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_transpose_impl.hpp b/profiler/include/profiler/profile_transpose_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/src/CMakeLists.txt b/profiler/src/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/profiler/src/profile_avg_pool3d_bwd.cpp b/profiler/src/profile_avg_pool3d_bwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_batched_gemm.cpp b/profiler/src/profile_batched_gemm.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp b/profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_batched_gemm_gemm.cpp b/profiler/src/profile_batched_gemm_gemm.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_batched_gemm_multi_d.cpp b/profiler/src/profile_batched_gemm_multi_d.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_batched_gemm_reduce.cpp b/profiler/src/profile_batched_gemm_reduce.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_batchnorm_bwd.cpp b/profiler/src/profile_batchnorm_bwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_batchnorm_fwd.cpp b/profiler/src/profile_batchnorm_fwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_batchnorm_infer.cpp b/profiler/src/profile_batchnorm_infer.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_contraction_bilinear.cpp b/profiler/src/profile_contraction_bilinear.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_contraction_scale.cpp b/profiler/src/profile_contraction_scale.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_conv_bwd_data.cpp b/profiler/src/profile_conv_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_conv_fwd.cpp b/profiler/src/profile_conv_fwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_conv_fwd_bias_relu.cpp b/profiler/src/profile_conv_fwd_bias_relu.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_conv_fwd_bias_relu_add.cpp b/profiler/src/profile_conv_fwd_bias_relu_add.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_conv_tensor_rearrange.cpp b/profiler/src/profile_conv_tensor_rearrange.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm.cpp b/profiler/src/profile_gemm.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_add.cpp b/profiler/src/profile_gemm_add.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_add_add_fastgelu.cpp b/profiler/src/profile_gemm_add_add_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_add_fastgelu.cpp b/profiler/src/profile_gemm_add_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_add_multiply.cpp b/profiler/src/profile_gemm_add_multiply.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_add_relu.cpp b/profiler/src/profile_gemm_add_relu.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_add_relu_add_layernorm.cpp b/profiler/src/profile_gemm_add_relu_add_layernorm.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_add_silu.cpp b/profiler/src/profile_gemm_add_silu.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_bias_add_reduce.cpp b/profiler/src/profile_gemm_bias_add_reduce.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_bilinear.cpp b/profiler/src/profile_gemm_bilinear.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_fastgelu.cpp b/profiler/src/profile_gemm_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_multiply_add.cpp b/profiler/src/profile_gemm_multiply_add.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_reduce.cpp b/profiler/src/profile_gemm_reduce.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_splitk.cpp b/profiler/src/profile_gemm_splitk.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_streamk.cpp b/profiler/src/profile_gemm_streamk.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_grouped_conv_bwd_data.cpp b/profiler/src/profile_grouped_conv_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_grouped_conv_bwd_weight.cpp b/profiler/src/profile_grouped_conv_bwd_weight.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_grouped_conv_fwd.cpp b/profiler/src/profile_grouped_conv_fwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_grouped_gemm.cpp b/profiler/src/profile_grouped_gemm.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_grouped_gemm_fastgelu.cpp b/profiler/src/profile_grouped_gemm_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_grouped_gemm_fixed_nk.cpp b/profiler/src/profile_grouped_gemm_fixed_nk.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_groupnorm_bwd_data.cpp b/profiler/src/profile_groupnorm_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_groupnorm_bwd_gamma_beta.cpp b/profiler/src/profile_groupnorm_bwd_gamma_beta.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_groupnorm_fwd.cpp b/profiler/src/profile_groupnorm_fwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_layernorm_bwd_data.cpp b/profiler/src/profile_layernorm_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_layernorm_bwd_gamma_beta.cpp b/profiler/src/profile_layernorm_bwd_gamma_beta.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_layernorm_fwd.cpp b/profiler/src/profile_layernorm_fwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_max_pool3d_bwd.cpp b/profiler/src/profile_max_pool3d_bwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_max_pool3d_fwd.cpp b/profiler/src/profile_max_pool3d_fwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_permute_scale.cpp b/profiler/src/profile_permute_scale.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_reduce.cpp b/profiler/src/profile_reduce.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_softmax.cpp b/profiler/src/profile_softmax.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_transpose.cpp b/profiler/src/profile_transpose.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profiler.cpp b/profiler/src/profiler.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profiler_operation_registry.hpp b/profiler/src/profiler_operation_registry.hpp old mode 100644 new mode 100755 diff --git a/rbuild.ini b/rbuild.ini old mode 100644 new mode 100755 diff --git a/requirements.txt b/requirements.txt old mode 100644 new mode 100755 diff --git a/script/cmake-ck-dev.sh b/script/cmake-ck-dev.sh index 51d6f7a30c191dbcae9a6ec999ca4d20aa303cda..13f5f086719227c328b4b19c7548327c4f1ac4aa 100755 --- a/script/cmake-ck-dev.sh +++ b/script/cmake-ck-dev.sh @@ -11,7 +11,7 @@ cmake -D CMAKE_CXX_FLAGS="-std=c++17 -O3 -ftemplate-backtrace-limit=0 -fPIE -Wno-gnu-line-marker" \ -D CMAKE_BUILD_TYPE=Release \ -D BUILD_DEV=ON \ --D GPU_TARGETS="gfx908;gfx90a;gfx940" \ +-D GPU_TARGETS="gfx942" \ -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \ -D USE_BITINT_EXTENSION_INT4=OFF \ ${MY_PROJECT_SOURCE} diff --git a/script/hip_fatbin_insert b/script/hip_fatbin_insert old mode 100644 new mode 100755 diff --git a/script/process_perf_data.py b/script/process_perf_data.py old mode 100644 new mode 100755 diff --git a/script/redis-cli.conf b/script/redis-cli.conf old mode 100644 new mode 100755 diff --git a/script/test_convnd_fwd.sh b/script/test_convnd_fwd.sh old mode 100644 new mode 100755 diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/batched_gemm/CMakeLists.txt b/test/batched_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/batched_gemm/test_batched_gemm.cpp b/test/batched_gemm/test_batched_gemm.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_gemm/CMakeLists.txt b/test/batched_gemm_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/batched_gemm_gemm/test_batched_gemm_gemm_fp16.cpp b/test/batched_gemm_gemm/test_batched_gemm_gemm_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp b/test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_multi_d/CMakeLists.txt b/test/batched_gemm_multi_d/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp b/test/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_reduce/CMakeLists.txt b/test/batched_gemm_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp b/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm/CMakeLists.txt b/test/batched_gemm_softmax_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_fp16.cpp b/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp b/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm_permute/CMakeLists.txt b/test/batched_gemm_softmax_gemm_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_util.hpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_util.hpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16.cpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16.cpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_util.hpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_util.hpp old mode 100644 new mode 100755 diff --git a/test/batchnorm/CMakeLists.txt b/test/batchnorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/batchnorm/batchnorm_bwd_rank_4.cpp b/test/batchnorm/batchnorm_bwd_rank_4.cpp old mode 100644 new mode 100755 diff --git a/test/batchnorm/batchnorm_fwd_rank_4.cpp b/test/batchnorm/batchnorm_fwd_rank_4.cpp old mode 100644 new mode 100755 diff --git a/test/batchnorm/batchnorm_infer_rank_4.cpp b/test/batchnorm/batchnorm_infer_rank_4.cpp old mode 100644 new mode 100755 diff --git a/test/block_swizzle_test/block_swizzle_test.cpp b/test/block_swizzle_test/block_swizzle_test.cpp old mode 100644 new mode 100755 diff --git a/test/block_swizzle_test/rebuild.sh b/test/block_swizzle_test/rebuild.sh old mode 100644 new mode 100755 diff --git a/test/block_swizzle_test/simple_args.h b/test/block_swizzle_test/simple_args.h old mode 100644 new mode 100755 diff --git a/test/block_to_ctile_map/CMakeLists.txt b/test/block_to_ctile_map/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/block_to_ctile_map/test_block_to_ctile_map.cpp b/test/block_to_ctile_map/test_block_to_ctile_map.cpp old mode 100644 new mode 100755 diff --git a/test/contraction/CMakeLists.txt b/test/contraction/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/contraction/test_contraction.cpp b/test/contraction/test_contraction.cpp old mode 100644 new mode 100755 diff --git a/test/contraction/test_contraction_interface.cpp b/test/contraction/test_contraction_interface.cpp old mode 100644 new mode 100755 diff --git a/test/conv_tensor_rearrange/CMakeLists.txt b/test/conv_tensor_rearrange/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/conv_tensor_rearrange/test_conv_tensor_rearrange.cpp b/test/conv_tensor_rearrange/test_conv_tensor_rearrange.cpp old mode 100644 new mode 100755 diff --git a/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp b/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp old mode 100644 new mode 100755 diff --git a/test/conv_util/CMakeLists.txt b/test/conv_util/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/conv_util/conv_util.cpp b/test/conv_util/conv_util.cpp old mode 100644 new mode 100755 diff --git a/test/convnd_bwd_data/CMakeLists.txt b/test/convnd_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/convnd_bwd_data/convnd_bwd_data.cpp b/test/convnd_bwd_data/convnd_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/test/convnd_fwd/CMakeLists.txt b/test/convnd_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/convnd_fwd/convnd_fwd.cpp b/test/convnd_fwd/convnd_fwd.cpp old mode 100644 new mode 100755 diff --git a/test/data_type/CMakeLists.txt b/test/data_type/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/data_type/test_bf8.cpp b/test/data_type/test_bf8.cpp old mode 100644 new mode 100755 diff --git a/test/data_type/test_fp8.cpp b/test/data_type/test_fp8.cpp old mode 100644 new mode 100755 diff --git a/test/data_type/test_int4.cpp b/test/data_type/test_int4.cpp old mode 100644 new mode 100755 diff --git a/test/data_type/type_convert_const.cpp b/test/data_type/type_convert_const.cpp old mode 100644 new mode 100755 diff --git a/test/elementwise_normalization/CMakeLists.txt b/test/elementwise_normalization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/elementwise_normalization/test_elementwise_layernorm_fp16.cpp b/test/elementwise_normalization/test_elementwise_layernorm_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/CMakeLists.txt b/test/gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/gemm/gemm_bf16.cpp b/test/gemm/gemm_bf16.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/gemm_fp16.cpp b/test/gemm/gemm_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/gemm_fp32.cpp b/test/gemm/gemm_fp32.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/gemm_fp64.cpp b/test/gemm/gemm_fp64.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/gemm_int8.cpp b/test/gemm/gemm_int8.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/gemm_standalone_xdl_fp16.cpp b/test/gemm/gemm_standalone_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/gemm_util.hpp b/test/gemm/gemm_util.hpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_f16_nn_instance.cpp b/test/gemm/instance/gemm_f16_nn_instance.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_f16_nn_instance.hpp b/test/gemm/instance/gemm_f16_nn_instance.hpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_f16_nt_instance.cpp b/test/gemm/instance/gemm_f16_nt_instance.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_f16_nt_instance.hpp b/test/gemm/instance/gemm_f16_nt_instance.hpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_f16_tn_instance.cpp b/test/gemm/instance/gemm_f16_tn_instance.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_f16_tn_instance.hpp b/test/gemm/instance/gemm_f16_tn_instance.hpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_f16_tt_instance.cpp b/test/gemm/instance/gemm_f16_tt_instance.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_f16_tt_instance.hpp b/test/gemm/instance/gemm_f16_tt_instance.hpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp b/test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_wavelet_f16_tn_instance.hpp b/test/gemm/instance/gemm_wavelet_f16_tn_instance.hpp old mode 100644 new mode 100755 diff --git a/test/gemm/run_gemm_test.inc b/test/gemm/run_gemm_test.inc old mode 100644 new mode 100755 diff --git a/test/gemm_add/CMakeLists.txt b/test/gemm_add/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/gemm_add/test_gemm_add.hpp b/test/gemm_add/test_gemm_add.hpp old mode 100644 new mode 100755 diff --git a/test/gemm_add/test_gemm_add_fastgelu.cpp b/test/gemm_add/test_gemm_add_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/test/gemm_add/test_gemm_add_relu.cpp b/test/gemm_add/test_gemm_add_relu.cpp old mode 100644 new mode 100755 diff --git a/test/gemm_add/test_gemm_add_silu.cpp b/test/gemm_add/test_gemm_add_silu.cpp old mode 100644 new mode 100755 diff --git a/test/gemm_layernorm/CMakeLists.txt b/test/gemm_layernorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/gemm_layernorm/test_gemm_add_relu_add_layernorm_fp16.cpp b/test/gemm_layernorm/test_gemm_add_relu_add_layernorm_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/gemm_reduce/CMakeLists.txt b/test/gemm_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/gemm_reduce/gemm_reduce_fp16.cpp b/test/gemm_reduce/gemm_reduce_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/gemm_split_k/CMakeLists.txt b/test/gemm_split_k/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/gemm_split_k/test_gemm_splitk.cpp b/test/gemm_split_k/test_gemm_splitk.cpp old mode 100644 new mode 100755 diff --git a/test/gemm_split_k/test_gemm_splitk_ut_cases.inc b/test/gemm_split_k/test_gemm_splitk_ut_cases.inc old mode 100644 new mode 100755 diff --git a/test/gemm_split_k/test_gemm_splitk_util.hpp b/test/gemm_split_k/test_gemm_splitk_util.hpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_bwd_data/CMakeLists.txt b/test/grouped_convnd_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp b/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_wmma.cpp b/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_wmma.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_xdl.cpp b/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_xdl.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_bwd_weight/CMakeLists.txt b/test/grouped_convnd_bwd_weight/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp b/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_wmma.cpp b/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_wmma.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_xdl.cpp b/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_xdl.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_fwd/CMakeLists.txt b/test/grouped_convnd_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp b/test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_fwd/test_grouped_convnd_fwd_multi_ab_interface.cpp b/test/grouped_convnd_fwd/test_grouped_convnd_fwd_multi_ab_interface.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_fwd/test_grouped_convnd_fwd_multi_d_interface_compatibility.cpp b/test/grouped_convnd_fwd/test_grouped_convnd_fwd_multi_d_interface_compatibility.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_gemm/CMakeLists.txt b/test/grouped_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/grouped_gemm/test_grouped_gemm_interface.cpp b/test/grouped_gemm/test_grouped_gemm_interface.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_gemm/test_grouped_gemm_splitk.cpp b/test/grouped_gemm/test_grouped_gemm_splitk.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_gemm/test_grouped_gemm_ut_cases.inc b/test/grouped_gemm/test_grouped_gemm_ut_cases.inc old mode 100644 new mode 100755 diff --git a/test/grouped_gemm/test_grouped_gemm_util.hpp b/test/grouped_gemm/test_grouped_gemm_util.hpp old mode 100644 new mode 100755 diff --git a/test/magic_number_division/CMakeLists.txt b/test/magic_number_division/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/magic_number_division/magic_number_division.cpp b/test/magic_number_division/magic_number_division.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_bwd_data/CMakeLists.txt b/test/normalization_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/normalization_bwd_data/test_groupnorm_bwd_data_fp32.cpp b/test/normalization_bwd_data/test_groupnorm_bwd_data_fp32.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_bwd_data/test_layernorm2d_bwd_data_fp32.cpp b/test/normalization_bwd_data/test_layernorm2d_bwd_data_fp32.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_bwd_gamma_beta/CMakeLists.txt b/test/normalization_bwd_gamma_beta/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/normalization_bwd_gamma_beta/test_groupnorm_bwd_gamma_beta_fp32.cpp b/test/normalization_bwd_gamma_beta/test_groupnorm_bwd_gamma_beta_fp32.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_bwd_gamma_beta/test_layernorm2d_bwd_gamma_beta_fp32.cpp b/test/normalization_bwd_gamma_beta/test_layernorm2d_bwd_gamma_beta_fp32.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_fwd/CMakeLists.txt b/test/normalization_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/normalization_fwd/test_groupnorm_fwd_fp16.cpp b/test/normalization_fwd/test_groupnorm_fwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_fwd/test_groupnorm_fwd_fp32.cpp b/test/normalization_fwd/test_groupnorm_fwd_fp32.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_fwd/test_layernorm2d_fwd_fp16.cpp b/test/normalization_fwd/test_layernorm2d_fwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_fwd/test_layernorm2d_fwd_fp32.cpp b/test/normalization_fwd/test_layernorm2d_fwd_fp32.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_fwd/test_layernorm4d_fwd_fp16.cpp b/test/normalization_fwd/test_layernorm4d_fwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/permute_scale/CMakeLists.txt b/test/permute_scale/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/permute_scale/test_permute_scale.cpp b/test/permute_scale/test_permute_scale.cpp old mode 100644 new mode 100755 diff --git a/test/pool/CMakeLists.txt b/test/pool/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/pool/test_avg_pool3d_bwd.cpp b/test/pool/test_avg_pool3d_bwd.cpp old mode 100644 new mode 100755 diff --git a/test/pool/test_avg_pool3d_fwd.cpp b/test/pool/test_avg_pool3d_fwd.cpp old mode 100644 new mode 100755 diff --git a/test/pool/test_max_pool3d_bwd.cpp b/test/pool/test_max_pool3d_bwd.cpp old mode 100644 new mode 100755 diff --git a/test/pool/test_max_pool3d_fwd.cpp b/test/pool/test_max_pool3d_fwd.cpp old mode 100644 new mode 100755 diff --git a/test/pool/test_pool_fwd_common.hpp b/test/pool/test_pool_fwd_common.hpp old mode 100644 new mode 100755 diff --git a/test/reduce/CMakeLists.txt b/test/reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/reduce/reduce_no_index.cpp b/test/reduce/reduce_no_index.cpp old mode 100644 new mode 100755 diff --git a/test/reduce/reduce_with_index.cpp b/test/reduce/reduce_with_index.cpp old mode 100644 new mode 100755 diff --git a/test/reference_conv_fwd/CMakeLists.txt b/test/reference_conv_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/reference_conv_fwd/reference_conv_fwd.cpp b/test/reference_conv_fwd/reference_conv_fwd.cpp old mode 100644 new mode 100755 diff --git a/test/softmax/CMakeLists.txt b/test/softmax/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/softmax/test_softmax_interface.cpp b/test/softmax/test_softmax_interface.cpp old mode 100644 new mode 100755 diff --git a/test/softmax/test_softmax_rank3.cpp b/test/softmax/test_softmax_rank3.cpp old mode 100644 new mode 100755 diff --git a/test/softmax/test_softmax_rank4.cpp b/test/softmax/test_softmax_rank4.cpp old mode 100644 new mode 100755 diff --git a/test/softmax/test_softmax_ut_cases.inc b/test/softmax/test_softmax_ut_cases.inc old mode 100644 new mode 100755 diff --git a/test/softmax/test_softmax_util.hpp b/test/softmax/test_softmax_util.hpp old mode 100644 new mode 100755 diff --git a/test/space_filling_curve/CMakeLists.txt b/test/space_filling_curve/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/space_filling_curve/space_filling_curve.cpp b/test/space_filling_curve/space_filling_curve.cpp old mode 100644 new mode 100755 diff --git a/test/transpose/CMakeLists.txt b/test/transpose/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/transpose/test_transpose.cpp b/test/transpose/test_transpose.cpp old mode 100644 new mode 100755 diff --git a/test/wmma_op/CMakeLists.txt b/test/wmma_op/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/wmma_op/wmma_op.cpp b/test/wmma_op/wmma_op.cpp old mode 100644 new mode 100755 diff --git a/test/wmma_op/wmma_op_util.hpp b/test/wmma_op/wmma_op_util.hpp old mode 100644 new mode 100755 diff --git a/test/wrapper/CMakeLists.txt b/test/wrapper/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/wrapper/test_wrapper_copy.cpp b/test/wrapper/test_wrapper_copy.cpp old mode 100644 new mode 100755 diff --git a/test/wrapper/test_wrapper_gemm.cpp b/test/wrapper/test_wrapper_gemm.cpp old mode 100644 new mode 100755 diff --git a/test/wrapper/test_wrapper_layout.cpp b/test/wrapper/test_wrapper_layout.cpp old mode 100644 new mode 100755 diff --git a/test/wrapper/test_wrapper_partition.cpp b/test/wrapper/test_wrapper_partition.cpp old mode 100644 new mode 100755 diff --git a/test/wrapper/test_wrapper_tensor.cpp b/test/wrapper/test_wrapper_tensor.cpp old mode 100644 new mode 100755