#pragma once #include torch::Tensor gemv_forward_cuda_decode( torch::Tensor _in_feats, torch::Tensor _kernel, torch::Tensor _scaling_factors, torch::Tensor _zeros, int m, int n, int k, int group_size);