// SPDX-License-Identifier: MIT #pragma once #include namespace aiter { void static_per_tensor_quant(torch::Tensor& out, // [..., d] torch::Tensor const& input, // [..., d] torch::Tensor const& scale); // [1] void dynamic_per_tensor_quant(torch::Tensor& out, // [..., d] torch::Tensor const& input, // [..., d] torch::Tensor& scale); // [1] void dynamic_per_token_scaled_quant(torch::Tensor& out, // [..., d] torch::Tensor const& input, // [..., d] torch::Tensor& scales, std::optional const& scale_ub, bool shuffle_scale = false, std::optional const& num_rows = std::nullopt, int num_rows_factor = 1); void dynamic_per_group_scaled_quant_fp4(torch::Tensor& out, // [..., d] torch::Tensor const& input, // [..., d] torch::Tensor& scales, int group_size = 32, bool shuffle_scale = true, std::optional const& num_rows = std::nullopt, int num_rows_factor = 1); void smooth_per_token_scaled_quant( torch::Tensor& out, // [..., d] torch::Tensor const& input, // [..., d] torch::Tensor& scales, torch::Tensor const& smooth_scale, std::optional const& smooth_scale_map = std::nullopt, bool shuffle_scale = false, std::optional const& num_rows = std::nullopt, int num_rows_factor = 1); void partial_transpose(torch::Tensor& out, // [rows, d] torch::Tensor const& input, // [rows, d] torch::Tensor const& num_rows); void moe_swiglu_dynamic_quant(torch::Tensor& scatter_tokens, torch::Tensor& smooth, torch::Tensor& experts_tokens_count, torch::Tensor& experts_tokens_start, torch::Tensor& output, torch::Tensor& scales, float beta); } // namespace aiter