qdq_8.cuh 643 Bytes
Newer Older
ilyas@huggingface.co's avatar
ilyas@huggingface.co committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#ifndef _qdq_8_cuh
#define _qdq_8_cuh

#include "qdq_util.cuh"
#include "../../config.h"

#if QMODE_8BIT == 1

  // Not implemented

#else

__forceinline__ __device__ void shuffle_8bit_4
(
    uint32_t* q,
    int stride
)
{
}

__forceinline__ __device__ void dequant_8bit_8
(
    const uint32_t q_0,
    const uint32_t q_1,
    half2 (&dq)[4],
    int stride
)
{
    half dqh[8];
    for (int i = 0; i < 4; i++) dqh[i    ] = dq_ns(exb(q_0, i * 8, 0xff), 128);
    for (int i = 0; i < 4; i++) dqh[i + 4] = dq_ns(exb(q_1, i * 8, 0xff), 128);

    for (int i = 0; i < 4; i++) dq[i] = __halves2half2(dqh[i * 2], dqh[i * 2 + 1]);
}

#endif

#endif