gemm_cuda.h 1.01 KB
Newer Older
Casper's avatar
Casper committed
1
2
3
4
5
#include <torch/extension.h>

torch::Tensor gemm_forward_cuda(torch::Tensor _in_feats, torch::Tensor _kernel,
    torch::Tensor _scaling_factors, torch::Tensor _zeros, int split_k_iters);

6
7
8
9
10
11
12
13
14
15
16
17
torch::Tensor grouped_gemm_forward(
    torch::Tensor _in_feats,
    torch::Tensor _kernel,
    torch::Tensor _scaling_factors,
    torch::Tensor _zeros,
    torch::Tensor _topk_weights,
    torch::Tensor _sorted_token_ids_ptr,
    torch::Tensor _expert_ids_ptr,
    torch::Tensor _num_tokens_post_padded,
    bool mul_weights,
    int split_k_iters);

Casper's avatar
Casper committed
18
torch::Tensor gemmv2_forward_cuda(torch::Tensor _in_feats, torch::Tensor _kernel,
Casper's avatar
Casper committed
19
20
21
22
23
    torch::Tensor _scaling_factors, torch::Tensor _zeros, int group_size, int split_k_iters);

// Source - https://github.com/compressa-ai/AutoAWQ/blob/6673333456b8871522b11a7fb110de612edfdf95/awq_cuda/quantization/gemm_cuda.h#L9C1-L10C106
torch::Tensor dequantize_weights_cuda(torch::Tensor _kernel,
    torch::Tensor _scaling_factors, torch::Tensor _zeros, int split_k_iters, int thx, int thy, bool dbg);