@@ -3004,6 +3052,7 @@ __global__ void gemm_device(int M, int N, int K,
...
@@ -3004,6 +3052,7 @@ __global__ void gemm_device(int M, int N, int K,
//template __global__ void kMatmul_inference_4bit<NF4, half, half, half>(half *A, unsigned char *B, half *out, int lda, int ldb, int rowsA, int colsA, int colsB);
//template __global__ void kMatmul_inference_4bit<NF4, half, half, half>(half *A, unsigned char *B, half *out, int lda, int ldb, int rowsA, int colsA, int colsB);