#pragma once#include <torch/all.h>// TODO(wentao): refactor the folder to 8bit, then includes fp8 and int8 folders// 8-bit per-token-group quantization helper used by both FP8 and INT8voidper_token_group_quant_8bit(consttorch::Tensor&input,torch::Tensor&output_q,torch::Tensor&output_s,int64_tgroup_size,doubleeps,doublemin_8bit,doublemax_8bit,boolscale_ue8m0=false);