#pragma once // SPDX-License-Identifier: MIT #include void paged_attention_ragged( torch::Tensor& out, // [num_seqs, num_heads, head_size] torch::Tensor& workspace_buffer, torch::Tensor& query, // [num_seqs, num_heads, head_size] torch::Tensor& key_cache, // [num_blocks, num_heads, block_size, head_size] or // [num_blocks, block_size, num_heads, head_size] torch::Tensor& value_cache, // [num_blocks, num_heads, block_size, head_size] or // [num_blocks, block_size, num_heads, head_size] double scale, torch::Tensor& kv_indptr, // [num_seqs + 1] torch::Tensor& kv_page_indices, // [max_num_blocks] std::optional& kv_last_page_lens, // [num_seqs] int64_t block_size, int64_t max_num_partitions, const std::optional& alibi_slopes, const std::string& kv_cache_dtype, const std::string& kv_cache_layout, float logits_soft_cap, torch::Tensor& k_scale, torch::Tensor& v_scale, const std::optional& fp8_out_scale, int64_t partition_size);