gemm_w8a8.h 247 Bytes
Newer Older
Zhekai Zhang's avatar
Zhekai Zhang committed
1
2
3
4
5
6
7
8
9
10
11
#pragma once

#include "common.h"
#include "Tensor.h"

Tensor gemm_w8a8_fp16(Tensor input,  // INT8
                      Tensor weight, // INT8
                      Tensor out,
                      half scale,
                      half bias
);