// SPDX-License-Identifier: MIT // Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved. #pragma once #include "ck_tile/host/ops/add_rmsnorm2d_rdquant/kernel/add_rmsnorm2d_rdquant_fwd_kernel.hpp" #include "ck_tile/host/ops/add_rmsnorm2d_rdquant/pipeline/add_rmsnorm2d_rdquant_fwd_pipeline_default_policy.hpp" #include "ck_tile/host/ops/add_rmsnorm2d_rdquant/pipeline/add_rmsnorm2d_rdquant_fwd_pipeline_one_pass.hpp" #include "ck_tile/host/ops/add_rmsnorm2d_rdquant/pipeline/add_rmsnorm2d_rdquant_fwd_pipeline_problem.hpp" #include "ck_tile/host/ops/add_rmsnorm2d_rdquant/pipeline/add_rmsnorm2d_rdquant_fwd_pipeline_three_pass.hpp" #include "ck_tile/host/ops/add_rmsnorm2d_rdquant.hpp" #include "ck_tile/host/ops/elementwise/unary_element_wise_operation.hpp" #include "ck_tile/host/ops/elementwise.hpp" #include "ck_tile/host/ops/flatmm/block/flatmm_32x512x128_1x4x1_16x16x32.hpp" #include "ck_tile/host/ops/flatmm/block/flatmm_sn_32x128x512_1x4x1_16x16x32.hpp" #include "ck_tile/host/ops/flatmm/block/flatmm_sn_32x128x512_1x4x1_16x16x32_itl.hpp" #include "ck_tile/host/ops/flatmm/block/flatmm_uk_config.hpp" #include "ck_tile/host/ops/flatmm.hpp" #include "ck_tile/host/ops/fused_moe/kernel/fused_moegemm_kernel.hpp" #include "ck_tile/host/ops/fused_moe/kernel/fused_moegemm_shape.hpp" #include "ck_tile/host/ops/fused_moe/kernel/fused_moegemm_tile_partitioner.hpp" #include "ck_tile/host/ops/fused_moe/kernel/moe_sorting_kernel.hpp" #include "ck_tile/host/ops/fused_moe/pipeline/fused_moegemm_pipeline_flatmm_ex.hpp" #include "ck_tile/host/ops/fused_moe/pipeline/fused_moegemm_pipeline_flatmm_policy.hpp" #include "ck_tile/host/ops/fused_moe/pipeline/fused_moegemm_pipeline_flatmm_uk.hpp" #include "ck_tile/host/ops/fused_moe/pipeline/fused_moegemm_pipeline_problem.hpp" #include "ck_tile/host/ops/fused_moe/pipeline/fused_moegemm_traits.hpp" #include "ck_tile/host/ops/fused_moe/pipeline/moe_sorting_pipeline.hpp" #include "ck_tile/host/ops/fused_moe/pipeline/moe_sorting_policy.hpp" #include "ck_tile/host/ops/fused_moe/pipeline/moe_sorting_problem.hpp" #include "ck_tile/host/ops/fused_moe.hpp" #include "ck_tile/host/ops/image_to_column/kernel/image_to_column_kernel.hpp" #include "ck_tile/host/ops/image_to_column/pipeline/block_image_to_column_problem.hpp" #include "ck_tile/host/ops/image_to_column/pipeline/tile_image_to_column_shape.hpp" #include "ck_tile/host/ops/image_to_column.hpp" #include "ck_tile/host/ops/norm_reduce/block/block_norm_reduce.hpp" #include "ck_tile/host/ops/norm_reduce/block/block_norm_reduce_problem.hpp" #include "ck_tile/host/ops/norm_reduce/thread/thread_welford.hpp" #include "ck_tile/host/ops/norm_reduce.hpp" #include "ck_tile/host/ops/permute/kernel/generic_permute_kernel.hpp" #include "ck_tile/host/ops/permute/pipeline/generic_petmute_problem.hpp" #include "ck_tile/host/ops/permute.hpp" #include "ck_tile/host/ops/rmsnorm2d/kernel/rmsnorm2d_fwd_kernel.hpp" #include "ck_tile/host/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_default_policy.hpp" #include "ck_tile/host/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_one_pass.hpp" #include "ck_tile/host/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_problem.hpp" #include "ck_tile/host/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_two_pass.hpp" #include "ck_tile/host/ops/rmsnorm2d.hpp" #include "ck_tile/host/ops/smoothquant/kernel/moe_smoothquant_kernel.hpp" #include "ck_tile/host/ops/smoothquant/kernel/smoothquant_kernel.hpp" #include "ck_tile/host/ops/smoothquant/pipeline/smoothquant_pipeline_default_policy.hpp" #include "ck_tile/host/ops/smoothquant/pipeline/smoothquant_pipeline_one_pass.hpp" #include "ck_tile/host/ops/smoothquant/pipeline/smoothquant_pipeline_problem.hpp" #include "ck_tile/host/ops/smoothquant/pipeline/smoothquant_pipeline_two_pass.hpp" #include "ck_tile/host/ops/smoothquant.hpp" #include "ck_tile/host/ops/softmax/block/block_softmax_2d.hpp" #include "ck_tile/host/ops/softmax/block/block_softmax_2d_problem.hpp" #include "ck_tile/host/ops/softmax.hpp" #include "ck_tile/host/ops/topk/block/block_topk_stream_2d.hpp" #include "ck_tile/host/ops/topk/block/block_topk_stream_2d_problem.hpp" #include "ck_tile/host/ops/topk.hpp" #include "ck_tile/host/ops/topk_softmax/kernel/topk_softmax_kernel.hpp" #include "ck_tile/host/ops/topk_softmax/pipeline/topk_softmax_warp_per_row_pipeline.hpp" #include "ck_tile/host/ops/topk_softmax/pipeline/topk_softmax_warp_per_row_policy.hpp" #include "ck_tile/host/ops/topk_softmax/pipeline/topk_softmax_warp_per_row_problem.hpp" #include "ck_tile/host/ops/topk_softmax.hpp" #include "ck_tile/host/reference/naive_attention.hpp" #include "ck_tile/host/reference/reference_batched_dropout.hpp" #include "ck_tile/host/reference/reference_batched_elementwise.hpp" #include "ck_tile/host/reference/reference_batched_gemm.hpp" #include "ck_tile/host/reference/reference_batched_masking.hpp" #include "ck_tile/host/reference/reference_batched_rotary_position_embedding.hpp" #include "ck_tile/host/reference/reference_batched_softmax.hpp" #include "ck_tile/host/reference/reference_elementwise.hpp" #include "ck_tile/host/reference/reference_fused_moe.hpp" #include "ck_tile/host/reference/reference_gemm.hpp" #include "ck_tile/host/reference/reference_im2col.hpp" #include "ck_tile/host/reference/reference_layernorm2d_fwd.hpp" #include "ck_tile/host/reference/reference_moe_sorting.hpp" #include "ck_tile/host/reference/reference_permute.hpp" #include "ck_tile/host/reference/reference_reduce.hpp" #include "ck_tile/host/reference/reference_rmsnorm2d_fwd.hpp" #include "ck_tile/host/reference/reference_rowwise_quantization2d.hpp" #include "ck_tile/host/reference/reference_softmax.hpp" #include "ck_tile/host/reference/reference_topk.hpp" #include "ck_tile/host/reference.hpp" #include "ck_tile/host/util/arg_parser.hpp" #include "ck_tile/host/util/check_err.hpp" #include "ck_tile/host/util/convolution_host_tensor_descriptor_helper.hpp" #include "ck_tile/host/util/convolution_parameter.hpp" #include "ck_tile/host/util/device_memory.hpp" #include "ck_tile/host/util/fill.hpp" #include "ck_tile/host/util/hip_check_error.hpp" #include "ck_tile/host/util/host_tensor.hpp" #include "ck_tile/host/util/joinable_thread.hpp" #include "ck_tile/host/util/kernel_launch.hpp" #include "ck_tile/host/util/ranges.hpp" #include "ck_tile/host/util/stream_config.hpp" #include "ck_tile/host/util/timer.hpp" #include "ck_tile/host/util.hpp"