Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
fc990f97
Unverified
Commit
fc990f97
authored
Sep 16, 2024
by
Isotr0py
Committed by
GitHub
Sep 15, 2024
Browse files
[Bugfix][Kernel] Add `IQ1_M` quantization implementation to GGUF kernel (#8357)
parent
3724d5f6
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
548 additions
and
162 deletions
+548
-162
csrc/quantization/gguf/dequantize.cuh
csrc/quantization/gguf/dequantize.cuh
+46
-9
csrc/quantization/gguf/ggml-common.h
csrc/quantization/gguf/ggml-common.h
+277
-131
csrc/quantization/gguf/gguf_kernel.cu
csrc/quantization/gguf/gguf_kernel.cu
+5
-0
csrc/quantization/gguf/mmvq.cuh
csrc/quantization/gguf/mmvq.cuh
+8
-0
csrc/quantization/gguf/vecdotq.cuh
csrc/quantization/gguf/vecdotq.cuh
+81
-20
requirements-common.txt
requirements-common.txt
+1
-1
tests/kernels/test_gguf.py
tests/kernels/test_gguf.py
+126
-0
vllm/model_executor/layers/quantization/gguf.py
vllm/model_executor/layers/quantization/gguf.py
+4
-1
No files found.
csrc/quantization/gguf/dequantize.cuh
View file @
fc990f97
...
@@ -353,18 +353,47 @@ static __global__ void dequantize_block_iq3_s(const void * __restrict__ vx, dst_
...
@@ -353,18 +353,47 @@ static __global__ void dequantize_block_iq3_s(const void * __restrict__ vx, dst_
template
<
typename
dst_t
>
template
<
typename
dst_t
>
static
__global__
void
dequantize_block_iq1_s
(
const
void
*
__restrict__
vx
,
dst_t
*
__restrict__
yy
)
{
static
__global__
void
dequantize_block_iq1_s
(
const
void
*
__restrict__
vx
,
dst_t
*
__restrict__
yy
)
{
const
int
i
=
blockIdx
.
x
;
const
int
64_t
i
=
blockIdx
.
x
;
const
block_iq1_s
*
x
=
(
const
block_iq1_s
*
)
vx
;
const
block_iq1_s
*
x
=
(
const
block_iq1_s
*
)
vx
;
const
int
tid
=
threadIdx
.
x
;
const
int64_t
tid
=
threadIdx
.
x
;
const
int
il
=
tid
/
8
;
// 0...3
const
int64_t
il
=
tid
/
8
;
// 0...3
const
int
ib
=
tid
%
8
;
// 0...7
const
int64_t
ib
=
tid
%
8
;
// 0...7
dst_t
*
y
=
yy
+
i
*
QK_K
+
32
*
ib
+
8
*
il
;
const
float
delta
=
x
[
i
].
qh
[
ib
]
&
0x8000
?
-
1
-
IQ1S_DELTA
:
-
1
+
IQ1S_DELTA
;
const
float
d
=
__half2float
(
x
[
i
].
d
)
*
(
2
*
((
x
[
i
].
qh
[
ib
]
>>
12
)
&
7
)
+
1
);
uint32_t
grid32
[
2
];
const
int8_t
*
q
=
(
const
int8_t
*
)
grid32
;
grid32
[
0
]
=
iq1s_grid_gpu
[
x
[
i
].
qs
[
4
*
ib
+
il
]
|
(((
x
[
i
].
qh
[
ib
]
>>
3
*
il
)
&
7
)
<<
8
)];
grid32
[
1
]
=
(
grid32
[
0
]
>>
4
)
&
0x0f0f0f0f
;
grid32
[
0
]
&=
0x0f0f0f0f
;
for
(
int
j
=
0
;
j
<
8
;
++
j
)
{
y
[
j
]
=
__float2half
(
d
*
(
q
[
j
]
+
delta
));
}
}
template
<
typename
dst_t
>
static
__global__
void
dequantize_block_iq1_m
(
const
void
*
__restrict__
vx
,
dst_t
*
__restrict__
yy
)
{
const
int64_t
i
=
blockIdx
.
x
;
const
block_iq1_m
*
x
=
(
const
block_iq1_m
*
)
vx
;
const
int64_t
tid
=
threadIdx
.
x
;
const
int64_t
il
=
tid
/
8
;
// 0...3
const
int64_t
ib
=
tid
%
8
;
// 0...7
dst_t
*
y
=
yy
+
i
*
QK_K
+
32
*
ib
+
8
*
il
;
dst_t
*
y
=
yy
+
i
*
QK_K
+
32
*
ib
+
8
*
il
;
const
int
i8
=
4
*
ib
+
il
;
const
uint16_t
*
sc
=
(
const
uint16_t
*
)
x
[
i
].
scales
;
uint8_t
h
=
x
[
i
].
scales
[
i8
/
2
]
>>
4
*
(
i8
%
2
);
iq1m_scale_t
scale
;
const
int8_t
*
grid
=
(
const
int8_t
*
)(
iq1s_grid
+
(
x
[
i
].
qs
[
i8
]
|
((
h
&
8
)
<<
5
)));
scale
.
u16
=
(
sc
[
0
]
>>
12
)
|
((
sc
[
1
]
>>
8
)
&
0x00f0
)
|
((
sc
[
2
]
>>
4
)
&
0x0f00
)
|
(
sc
[
3
]
&
0xf000
);
const
float
d
=
__half2float
(
x
[
i
].
d
)
*
(
2
*
(
h
&
7
)
+
1
);
const
int64_t
ib16
=
2
*
ib
+
il
/
2
;
// sc[ib16/4] >> 3*(ib16%4) -> sc[ib/2] >> 3*((2*ib+il/2)%4);
for
(
int
j
=
0
;
j
<
8
;
++
j
)
y
[
j
]
=
__float2half
(
d
*
grid
[
j
]);
const
float
d
=
__half2float
(
scale
.
f16
)
*
(
2
*
((
sc
[
ib16
/
4
]
>>
3
*
(
ib16
%
4
))
&
0x7
)
+
1
);
const
float
delta
=
x
[
i
].
qh
[
2
*
ib
+
il
/
2
]
&
(
0x08
<<
4
*
(
il
%
2
))
?
-
1
-
IQ1M_DELTA
:
-
1
+
IQ1M_DELTA
;
uint32_t
grid32
[
2
];
const
int8_t
*
q
=
(
const
int8_t
*
)
grid32
;
grid32
[
0
]
=
iq1s_grid_gpu
[
x
[
i
].
qs
[
4
*
ib
+
il
]
|
(((
x
[
i
].
qh
[
2
*
ib
+
il
/
2
]
>>
4
*
(
il
%
2
))
&
7
)
<<
8
)];
grid32
[
1
]
=
(
grid32
[
0
]
>>
4
)
&
0x0f0f0f0f
;
grid32
[
0
]
&=
0x0f0f0f0f
;
for
(
int
j
=
0
;
j
<
8
;
++
j
)
{
y
[
j
]
=
__float2half
(
d
*
(
q
[
j
]
+
delta
));
}
}
}
template
<
typename
dst_t
>
template
<
typename
dst_t
>
...
@@ -475,6 +504,12 @@ static void dequantize_row_iq1_s_cuda(const void * vx, dst_t * y, const int k, c
...
@@ -475,6 +504,12 @@ static void dequantize_row_iq1_s_cuda(const void * vx, dst_t * y, const int k, c
dequantize_block_iq1_s
<<<
nb
,
32
,
0
,
stream
>>>
(
vx
,
y
);
dequantize_block_iq1_s
<<<
nb
,
32
,
0
,
stream
>>>
(
vx
,
y
);
}
}
template
<
typename
dst_t
>
static
void
dequantize_row_iq1_m_cuda
(
const
void
*
vx
,
dst_t
*
y
,
const
int
k
,
cudaStream_t
stream
)
{
const
int
nb
=
k
/
QK_K
;
dequantize_block_iq1_m
<<<
nb
,
32
,
0
,
stream
>>>
(
vx
,
y
);
}
template
<
typename
dst_t
>
template
<
typename
dst_t
>
static
void
dequantize_row_iq4_nl_cuda
(
const
void
*
vx
,
dst_t
*
y
,
const
int
k
,
cudaStream_t
stream
)
{
static
void
dequantize_row_iq4_nl_cuda
(
const
void
*
vx
,
dst_t
*
y
,
const
int
k
,
cudaStream_t
stream
)
{
const
int
nb
=
(
k
+
QK_K
-
1
)
/
QK_K
;
const
int
nb
=
(
k
+
QK_K
-
1
)
/
QK_K
;
...
@@ -525,6 +560,8 @@ static to_fp16_cuda_t ggml_get_to_fp16_cuda(int64_t type) {
...
@@ -525,6 +560,8 @@ static to_fp16_cuda_t ggml_get_to_fp16_cuda(int64_t type) {
return
dequantize_row_iq2_s_cuda
;
return
dequantize_row_iq2_s_cuda
;
case
23
:
case
23
:
return
dequantize_row_iq4_xs_cuda
;
return
dequantize_row_iq4_xs_cuda
;
case
29
:
return
dequantize_row_iq1_m_cuda
;
default:
default:
return
nullptr
;
return
nullptr
;
}
}
...
...
csrc/quantization/gguf/ggml-common.h
View file @
fc990f97
...
@@ -149,14 +149,30 @@ typedef struct {
...
@@ -149,14 +149,30 @@ typedef struct {
uint8_t
scales
[
IQ3S_N_SCALE
];
uint8_t
scales
[
IQ3S_N_SCALE
];
}
block_iq3_s
;
}
block_iq3_s
;
// 1.5625 bpw
#define QR1_S 8
#define QR1_S 8
#define QI1_S (QK_K / (4*QR1_S))
#define QI1_S (QK_K / (4*QR1_S))
typedef
struct
{
typedef
struct
{
half
d
;
half
d
;
uint8_t
qs
[
QK_K
/
8
];
uint8_t
qs
[
QK_K
/
8
];
uint
8
_t
scales
[
QK_K
/
16
];
uint
16
_t
qh
[
QK_K
/
32
];
}
block_iq1_s
;
}
block_iq1_s
;
// 1.75 bpw
#define QR1_M 8
#define QI1_M (QK_K / (4*QR1_M))
typedef
struct
{
uint8_t
qs
[
QK_K
/
8
];
// grid index, low 8 bits
uint8_t
qh
[
QK_K
/
16
];
// grid index, high 3 bits + grid shift bit (for two groups of 8)
uint8_t
scales
[
QK_K
/
32
];
// 3-bit block scales (4-bit if QK_K == 64)
}
block_iq1_m
;
// Used by IQ1_M quants
typedef
union
{
half
f16
;
uint16_t
u16
;
}
iq1m_scale_t
;
#define QK4_NL 32
#define QK4_NL 32
#define QR4_NL 2
#define QR4_NL 2
#define QI4_NL (QK4_NL / (4*QR4_NL))
#define QI4_NL (QK4_NL / (4*QR4_NL))
...
@@ -733,135 +749,265 @@ static const __device__ uint32_t iq3xs_grid[512] = {
...
@@ -733,135 +749,265 @@ static const __device__ uint32_t iq3xs_grid[512] = {
0x3e240c1c
,
0x3e241404
,
0x3e242c04
,
0x3e2c1414
,
0x3e2c2414
,
0x3e340414
,
0x3e341c0c
,
0x3e3e0404
,
0x3e240c1c
,
0x3e241404
,
0x3e242c04
,
0x3e2c1414
,
0x3e2c2414
,
0x3e340414
,
0x3e341c0c
,
0x3e3e0404
,
};
};
static
const
__device__
uint64_t
iq1s_grid
[
512
]
=
{
#define IQ1S_DELTA 0.125f
0xffffffffffff0101
,
0xffffffffff01ff00
,
0xffffffffff010100
,
0xffffffff00000000
,
#define IQ1M_DELTA 0.125f
0xffffffff01ff00ff
,
0xffffffff01ff0001
,
0xffffffff0101ffff
,
0xffffffff0101ff01
,
static
const
__device__
uint64_t
iq1s_grid_gpu
[
2048
]
=
{
0xffffff00ff000000
,
0xffffff000000ff00
,
0xffffff00000000ff
,
0xffffff0000000100
,
0x00000000
,
0x00000002
,
0x00000101
,
0x00000200
,
0x00000202
,
0x00010001
,
0x00010101
,
0x00020000
,
0xffffff0000010000
,
0xffffff0001000000
,
0xffffff01ffff00ff
,
0xffffff01ff01ff00
,
0x00020002
,
0x00020200
,
0x00020202
,
0x01000101
,
0x01010001
,
0x01010100
,
0x01010102
,
0x01020101
,
0xffffff01ff010100
,
0xffffff0100000001
,
0xffffff0101ffff00
,
0xffffff0101ff0101
,
0x02000000
,
0x02000002
,
0x02000200
,
0x02000202
,
0x02010101
,
0x02020000
,
0x02020002
,
0x02020200
,
0xffffff0101010100
,
0xffff00ffff00ff01
,
0xffff00ffff0000ff
,
0xffff00ff00ff0100
,
0x02020202
,
0x00000110
,
0x00000111
,
0x00010011
,
0x00010110
,
0x00010112
,
0x00010211
,
0x00010212
,
0xffff00ff0100ff00
,
0xffff00ff010001ff
,
0xffff0000ff0101ff
,
0xffff000000ffff00
,
0x00020111
,
0x01000011
,
0x01000112
,
0x01000211
,
0x01010012
,
0x01010111
,
0x01010212
,
0x01020011
,
0xffff000000000000
,
0xffff00000001ff01
,
0xffff000001000101
,
0xffff0000010100ff
,
0x01020110
,
0x01020112
,
0x01020210
,
0x02000111
,
0x02010011
,
0x02010110
,
0x02010112
,
0x02020111
,
0xffff0001ffff0100
,
0xffff00010000ff00
,
0xffff000100010101
,
0xffff000101000000
,
0x00000020
,
0x00000022
,
0x00000220
,
0x00000222
,
0x00010121
,
0x00020020
,
0x00020022
,
0x00020220
,
0xffff01ffffff0000
,
0xffff01ffff01ffff
,
0xffff01ffff010100
,
0xffff01ff00000000
,
0x00020222
,
0x01000121
,
0x01010021
,
0x01010221
,
0x01020120
,
0x01020221
,
0x02000020
,
0x02000022
,
0xffff01ff01ffffff
,
0xffff01ff01ff0001
,
0xffff01ff0101ffff
,
0xffff01ff01010001
,
0x02000220
,
0x02000222
,
0x02010021
,
0x02010121
,
0x02010221
,
0x02020020
,
0x02020022
,
0x02020220
,
0xffff0100ffffff01
,
0xffff01000000ffff
,
0xffff010000000100
,
0xffff010001ff01ff
,
0x02020222
,
0x00011001
,
0x00011100
,
0x00011102
,
0x00021101
,
0x01001001
,
0x01001201
,
0x01011101
,
0xffff010001000000
,
0xffff0101ff000000
,
0xffff0101000101ff
,
0xffff010101ffff01
,
0x01011202
,
0x01021100
,
0x01021101
,
0x02011001
,
0x02011201
,
0x02021101
,
0x00001011
,
0x00001110
,
0xffff01010101ff00
,
0xff00ffffff000000
,
0xff00ffff00ffff00
,
0xff00ffff00000001
,
0x00001111
,
0x00001112
,
0x00011111
,
0x00011210
,
0x00011212
,
0x00021211
,
0x01001010
,
0x01001111
,
0xff00ffff000001ff
,
0xff00ffff01010000
,
0xff00ff00ffff0000
,
0xff00ff00ff00ff00
,
0x01001212
,
0x01011010
,
0x01011011
,
0x01011110
,
0x01011111
,
0x01011112
,
0x01011211
,
0x01021010
,
0xff00ff00ff0000ff
,
0xff00ff00ff000100
,
0xff00ff00ff010001
,
0xff00ff0000ff0001
,
0x01021012
,
0x01021111
,
0x01021210
,
0x01021212
,
0x02001011
,
0x02011011
,
0x02011111
,
0x02011210
,
0xff00ff000000ffff
,
0xff00ff0000000000
,
0xff00ff000001ff00
,
0xff00ff0000010100
,
0x02011212
,
0x02021011
,
0x02021110
,
0x02021111
,
0x02021112
,
0x02021211
,
0x00011120
,
0x00011221
,
0xff00ff0001ff0000
,
0xff00ff000100ff00
,
0xff00ff0001000100
,
0xff00ff01ff000000
,
0x01001021
,
0x01001120
,
0x01011020
,
0x01011022
,
0x01011121
,
0x01011220
,
0x01021020
,
0x01021021
,
0xff00ff0100ff0000
,
0xff00ff01000001ff
,
0xff00ff0101010001
,
0xff0000ff00000000
,
0x01021122
,
0x01021221
,
0x02001121
,
0x02011021
,
0x02011120
,
0x02011221
,
0x00002000
,
0x00002002
,
0xff0000ff0001ff00
,
0xff0000ff00010100
,
0xff000000ffff0101
,
0xff000000ff000000
,
0x00002200
,
0x00002202
,
0x00012101
,
0x00022000
,
0x00022002
,
0x00022200
,
0x00022202
,
0x01002101
,
0xff000000ff01ff00
,
0xff00000000ff0000
,
0xff0000000000ff00
,
0xff000000000000ff
,
0x01012001
,
0x01012102
,
0x01022101
,
0x02002000
,
0x02002002
,
0x02002200
,
0x02002202
,
0x02012101
,
0xff00000000000000
,
0xff00000000000001
,
0xff00000000000100
,
0xff0000000001ffff
,
0x02022000
,
0x02022002
,
0x02022200
,
0x02022202
,
0x00002111
,
0x00012011
,
0x00012110
,
0x00012211
,
0xff00000000010000
,
0xff00000001000000
,
0xff00000001010100
,
0xff000001ff00ff01
,
0x00022110
,
0x00022111
,
0x01002011
,
0x01012010
,
0x01012011
,
0x01012111
,
0x01022011
,
0x01022110
,
0xff000001ff0100ff
,
0xff00000100000000
,
0xff0000010001ff00
,
0xff00000101ff0100
,
0x01022211
,
0x02012011
,
0x02012110
,
0x02012112
,
0x02012211
,
0x02022111
,
0x00002020
,
0x00002022
,
0xff0000010100ff00
,
0xff0001ff00ff00ff
,
0xff0001ff00000101
,
0xff0001ff000100ff
,
0x00002220
,
0x00002222
,
0x00012121
,
0x00022020
,
0x00022022
,
0x00022220
,
0x00022222
,
0x01002121
,
0xff0001ff01000000
,
0xff000100ff0001ff
,
0xff0001000000ff01
,
0xff00010000000000
,
0x01012021
,
0x01012221
,
0x01022021
,
0x01022121
,
0x02002020
,
0x02002022
,
0x02002121
,
0x02002220
,
0xff00010000010001
,
0xff00010000010100
,
0xff00010001ffff00
,
0xff00010001ff0101
,
0x02002222
,
0x02012121
,
0x02022020
,
0x02022022
,
0x02022220
,
0x02022222
,
0x00110000
,
0x00110001
,
0xff00010001010000
,
0xff000101ffffffff
,
0xff000101ff000101
,
0xff00010101ff00ff
,
0x00110100
,
0x00110201
,
0x00120100
,
0x00120101
,
0x01100001
,
0x01100100
,
0x01110000
,
0x01110101
,
0xff00010101000001
,
0xff000101010100ff
,
0xff01ffffff000101
,
0xff01ffffff01ffff
,
0x01110200
,
0x01120001
,
0x01120100
,
0x01120101
,
0x01120201
,
0x02110001
,
0x02110100
,
0x02110102
,
0xff01ffffff01ff01
,
0xff01ffffff0101ff
,
0xff01ffff00000000
,
0xff01ffff01ff0001
,
0x02120001
,
0x02120101
,
0x00100011
,
0x00100110
,
0x00100112
,
0x00100211
,
0x00110010
,
0x00110012
,
0xff01ffff0101ff01
,
0xff01ff00ff000000
,
0xff01ff0000ff0100
,
0xff01ff000000ff01
,
0x00110111
,
0x00110210
,
0x00120011
,
0x00120110
,
0x00120211
,
0x01100111
,
0x01100212
,
0x01110010
,
0xff01ff0000010000
,
0xff01ff00010000ff
,
0xff01ff01ff01ff00
,
0xff01ff0100000101
,
0x01110011
,
0x01110012
,
0x01110110
,
0x01110111
,
0x01110112
,
0x01110211
,
0x01120010
,
0x01120111
,
0xff0100ffffff0000
,
0xff0100ffff010000
,
0xff0100ff01ff00ff
,
0xff0100ff01000100
,
0x02100110
,
0x02110012
,
0x02110111
,
0x02120011
,
0x02120110
,
0x00110021
,
0x00110120
,
0x00110122
,
0xff0100ff010100ff
,
0xff010000ffffff01
,
0xff01000000000000
,
0xff0100000101ff00
,
0x00120121
,
0x01100020
,
0x01100122
,
0x01100221
,
0x01110022
,
0x01110121
,
0x01110220
,
0x01110222
,
0xff010001ffff00ff
,
0xff010001ff000100
,
0xff01000100ffff00
,
0xff01000100010001
,
0x01120120
,
0x01120122
,
0x02100121
,
0x02110021
,
0x02110120
,
0x02110122
,
0x02120121
,
0x00101001
,
0xff01000101ff0001
,
0xff010001010001ff
,
0xff0101ffffffffff
,
0xff0101ffff01ffff
,
0x00101102
,
0x00101201
,
0x00111100
,
0x00111101
,
0x00111200
,
0x00111201
,
0x00121001
,
0x00121102
,
0xff0101ffff010101
,
0xff0101ff0000ff00
,
0xff0101ff01010001
,
0xff010100ff000000
,
0x01101001
,
0x01101101
,
0x01101102
,
0x01101200
,
0x01101202
,
0x01111001
,
0x01111100
,
0x01111101
,
0xff010100ff01ff01
,
0xff01010000ff0001
,
0xff01010000000100
,
0xff01010001000000
,
0x01111102
,
0x01111201
,
0x01121002
,
0x01121101
,
0x01121200
,
0x02101100
,
0x02101201
,
0x02111000
,
0xff0101010100ffff
,
0x00ffffff0000ff01
,
0x00ffffff000000ff
,
0x00ffffff00000100
,
0x02111100
,
0x02111101
,
0x02111200
,
0x02111201
,
0x02111202
,
0x02121001
,
0x02121100
,
0x02121101
,
0x00ffffff00010000
,
0x00ffff00ffff0001
,
0x00ffff00ff0000ff
,
0x00ffff00ff000100
,
0x02121201
,
0x00101012
,
0x00101111
,
0x00101212
,
0x00111011
,
0x00111110
,
0x00111111
,
0x00111112
,
0x00ffff0000000000
,
0x00ffff0001000100
,
0x00ffff0001010001
,
0x00ffff01ff00ff01
,
0x00111211
,
0x00121010
,
0x00121012
,
0x00121111
,
0x00121210
,
0x00121212
,
0x01101011
,
0x01101110
,
0x00ffff0100ff0100
,
0x00ffff010000ff00
,
0x00ffff01000100ff
,
0x00ffff0101ff00ff
,
0x01101111
,
0x01101112
,
0x01111011
,
0x01111012
,
0x01111110
,
0x01111111
,
0x01111112
,
0x01111211
,
0x00ffff010101ff00
,
0x00ff00ffffffffff
,
0x00ff00ffffff01ff
,
0x00ff00ffff000101
,
0x01111212
,
0x01121011
,
0x01121110
,
0x01121111
,
0x01121112
,
0x01121211
,
0x02101010
,
0x02101012
,
0x00ff00ff00000000
,
0x00ff00ff000101ff
,
0x00ff00ff01010101
,
0x00ff0000ff000000
,
0x02101110
,
0x02101111
,
0x02101210
,
0x02101212
,
0x02111010
,
0x02111011
,
0x02111110
,
0x02111111
,
0x00ff0000ff01ffff
,
0x00ff000000ff0000
,
0x00ff00000000ff00
,
0x00ff0000000000ff
,
0x02111112
,
0x02111211
,
0x02111212
,
0x02121010
,
0x02121012
,
0x02121111
,
0x00101021
,
0x00101120
,
0x00ff000000000000
,
0x00ff000000000001
,
0x00ff000000000100
,
0x00ff000000010000
,
0x00101121
,
0x00101122
,
0x00111121
,
0x00111122
,
0x00111220
,
0x00111222
,
0x00121021
,
0x00121122
,
0x00ff000001ffff01
,
0x00ff000001000000
,
0x00ff0001ff000101
,
0x00ff000100ffffff
,
0x01101020
,
0x01101022
,
0x01101120
,
0x01101121
,
0x01101220
,
0x01101222
,
0x01111021
,
0x01111121
,
0x00ff000100000000
,
0x00ff0001010001ff
,
0x00ff01ffff000000
,
0x00ff01ff0001ff00
,
0x01111122
,
0x01111220
,
0x01111221
,
0x01121021
,
0x01121120
,
0x01121121
,
0x01121220
,
0x01121221
,
0x00ff01ff01ff0100
,
0x00ff0100ff01ff01
,
0x00ff010000ff00ff
,
0x00ff010000ff0101
,
0x01121222
,
0x02101122
,
0x02101222
,
0x02111022
,
0x02111121
,
0x02121120
,
0x02121221
,
0x00112001
,
0x00ff010000000000
,
0x00ff010000010101
,
0x00ff01000100ff00
,
0x00ff010001010000
,
0x00112102
,
0x00122101
,
0x01102001
,
0x01102100
,
0x01102102
,
0x01102201
,
0x01112000
,
0x01112101
,
0x00ff0101ffffff00
,
0x00ff01010000ff01
,
0x00ff010100000100
,
0x00ff010101ff0000
,
0x01112200
,
0x01112202
,
0x01122000
,
0x01122001
,
0x01122100
,
0x01122102
,
0x01122201
,
0x02102101
,
0x0000ffffffff0100
,
0x0000ffffff00ff00
,
0x0000ffffff0000ff
,
0x0000ffffff010000
,
0x02112001
,
0x02112100
,
0x02122101
,
0x00112010
,
0x00112012
,
0x00112111
,
0x00112212
,
0x00122011
,
0x0000ffff00000000
,
0x0000ffff00010101
,
0x0000ffff01ffff01
,
0x0000ffff01000100
,
0x00122111
,
0x01102012
,
0x01102110
,
0x01102111
,
0x01102210
,
0x01112011
,
0x01112110
,
0x01112111
,
0x0000ff00ff000000
,
0x0000ff00ff01ff00
,
0x0000ff00ff0101ff
,
0x0000ff0000ff0000
,
0x01112112
,
0x01112211
,
0x01112212
,
0x01122010
,
0x01122111
,
0x01122212
,
0x02102211
,
0x02112011
,
0x0000ff000000ff00
,
0x0000ff00000000ff
,
0x0000ff0000000000
,
0x0000ff0000000001
,
0x02112012
,
0x02112111
,
0x02112210
,
0x02122011
,
0x02122112
,
0x02122211
,
0x00102221
,
0x00112122
,
0x0000ff0000000100
,
0x0000ff0000010000
,
0x0000ff0001ffffff
,
0x0000ff0001ff01ff
,
0x00122120
,
0x00122122
,
0x01102120
,
0x01102122
,
0x01102221
,
0x01112020
,
0x01112022
,
0x01112121
,
0x0000ff0001000000
,
0x0000ff000101ffff
,
0x0000ff01ffff0101
,
0x0000ff01ff010000
,
0x01112220
,
0x01122021
,
0x01122122
,
0x01122221
,
0x02102121
,
0x02112021
,
0x02112122
,
0x02112222
,
0x0000ff0100000000
,
0x0000ff0101000101
,
0x000000ffffff0001
,
0x000000ffff000000
,
0x00200000
,
0x00200002
,
0x00200200
,
0x00200202
,
0x00210101
,
0x00220000
,
0x00220002
,
0x00220101
,
0x000000ff00ff0000
,
0x000000ff0000ff00
,
0x000000ff000000ff
,
0x000000ff00000000
,
0x00220200
,
0x00220202
,
0x01200101
,
0x01210001
,
0x01210201
,
0x01220001
,
0x01220101
,
0x02200000
,
0x000000ff00000001
,
0x000000ff00000100
,
0x000000ff00010000
,
0x000000ff01000000
,
0x02200002
,
0x02200200
,
0x02200202
,
0x02210101
,
0x02220000
,
0x02220002
,
0x02220101
,
0x02220200
,
0x000000ff0101ff00
,
0x00000000ffff0000
,
0x00000000ff00ff00
,
0x00000000ff0000ff
,
0x02220202
,
0x00200111
,
0x00210011
,
0x00210110
,
0x00210211
,
0x00220111
,
0x01200012
,
0x01200110
,
0x00000000ff000000
,
0x00000000ff000001
,
0x00000000ff000100
,
0x00000000ff010000
,
0x01200211
,
0x01210111
,
0x01210210
,
0x01210212
,
0x01220011
,
0x01220110
,
0x01220111
,
0x01220112
,
0x0000000000ffff00
,
0x0000000000ff00ff
,
0x0000000000ff0000
,
0x0000000000ff0001
,
0x02200111
,
0x02210010
,
0x02210112
,
0x02210211
,
0x02220111
,
0x00200021
,
0x00200220
,
0x00200222
,
0x0000000000ff0100
,
0x000000000000ffff
,
0x000000000000ff00
,
0x000000000000ff01
,
0x00210021
,
0x00210121
,
0x00220020
,
0x00220022
,
0x00220220
,
0x00220222
,
0x01200121
,
0x01210021
,
0x00000000000000ff
,
0x0000000000000001
,
0x00000000000001ff
,
0x0000000000000100
,
0x01210122
,
0x01210221
,
0x01220121
,
0x02200021
,
0x02200220
,
0x02200222
,
0x02210021
,
0x02210121
,
0x0000000000000101
,
0x000000000001ff00
,
0x00000000000100ff
,
0x0000000000010000
,
0x02220020
,
0x02220022
,
0x02220220
,
0x02220222
,
0x00201101
,
0x00211100
,
0x00211102
,
0x00211201
,
0x0000000000010001
,
0x0000000000010100
,
0x0000000001ff0000
,
0x000000000100ff00
,
0x00221101
,
0x01201100
,
0x01201101
,
0x01201102
,
0x01201201
,
0x01211002
,
0x01211101
,
0x01211200
,
0x00000000010000ff
,
0x0000000001000000
,
0x0000000001000001
,
0x0000000001000100
,
0x01211202
,
0x01221102
,
0x02201101
,
0x02211001
,
0x02211100
,
0x02211201
,
0x02221001
,
0x02221101
,
0x0000000001010000
,
0x00000001ffff01ff
,
0x00000001ff000000
,
0x0000000100ff0000
,
0x00201211
,
0x00211111
,
0x00221011
,
0x00221211
,
0x01201010
,
0x01201111
,
0x01201210
,
0x01211011
,
0x000000010000ff00
,
0x00000001000000ff
,
0x0000000100000000
,
0x0000000100000001
,
0x01211110
,
0x01211111
,
0x01211211
,
0x01221012
,
0x01221111
,
0x01221210
,
0x02201211
,
0x02211010
,
0x0000000100000100
,
0x0000000100010000
,
0x0000000101000000
,
0x000001ffff00ff00
,
0x02211110
,
0x02211111
,
0x02211210
,
0x02211212
,
0x02221011
,
0x02221110
,
0x02221112
,
0x02221211
,
0x000001ffff010001
,
0x000001ffff0101ff
,
0x000001ff00ffff01
,
0x000001ff0000ffff
,
0x00201121
,
0x00211020
,
0x00211022
,
0x00211221
,
0x00221121
,
0x01201021
,
0x01201221
,
0x01211121
,
0x000001ff00000000
,
0x000001ff010000ff
,
0x000001ff01010100
,
0x00000100ffff0100
,
0x01221020
,
0x01221021
,
0x01221221
,
0x02201120
,
0x02201122
,
0x02211020
,
0x02211222
,
0x00202000
,
0x00000100ff000000
,
0x0000010000ff0000
,
0x000001000000ff00
,
0x00000100000000ff
,
0x00202002
,
0x00202200
,
0x00202202
,
0x00212101
,
0x00222000
,
0x00222002
,
0x00222200
,
0x00222202
,
0x0000010000000000
,
0x0000010000000001
,
0x0000010000000100
,
0x0000010000010000
,
0x01202101
,
0x01212001
,
0x01212100
,
0x01222101
,
0x02202000
,
0x02202002
,
0x02202200
,
0x02202202
,
0x0000010001000000
,
0x000001000101ff01
,
0x00000101ffff0001
,
0x00000101ff01ffff
,
0x02222000
,
0x02222002
,
0x02222200
,
0x02222202
,
0x00202211
,
0x00212011
,
0x00212110
,
0x00212211
,
0x0000010100000000
,
0x0000010101010100
,
0x0001ffffff000000
,
0x0001ffff00ffffff
,
0x00222111
,
0x01202112
,
0x01202211
,
0x01212012
,
0x01212111
,
0x01222011
,
0x01222110
,
0x01222112
,
0x0001ffff00000100
,
0x0001ffff0001ff00
,
0x0001ffff01000000
,
0x0001ff00ffffff00
,
0x01222211
,
0x02202111
,
0x02212010
,
0x02212112
,
0x02212211
,
0x02222110
,
0x02222111
,
0x00202020
,
0x0001ff00ffff01ff
,
0x0001ff00ff010000
,
0x0001ff0000000000
,
0x0001ff0000010001
,
0x00202022
,
0x00202220
,
0x00202222
,
0x00222020
,
0x00222022
,
0x00222220
,
0x00222222
,
0x01202121
,
0x0001ff0001ff0000
,
0x0001ff0001010100
,
0x0001ff01ff0000ff
,
0x0001ff01ff000001
,
0x01212021
,
0x01212122
,
0x01212221
,
0x01222121
,
0x02202020
,
0x02202022
,
0x02202220
,
0x02202222
,
0x0001ff0100ffffff
,
0x0001ff010001ffff
,
0x0001ff01000101ff
,
0x0001ff010100ff01
,
0x02212121
,
0x02222020
,
0x02222022
,
0x02222220
,
0x02222222
,
0x10000101
,
0x10010001
,
0x10010102
,
0x000100ffff00ffff
,
0x000100ffff00ff01
,
0x000100ffff000100
,
0x000100ff00000000
,
0x10020101
,
0x11000201
,
0x11010002
,
0x11010101
,
0x11010200
,
0x11010202
,
0x11020001
,
0x11020100
,
0x000100ff000101ff
,
0x000100ff01ff0101
,
0x000100ff0100ffff
,
0x000100ff01010101
,
0x11020102
,
0x12010100
,
0x12010201
,
0x12020001
,
0x12020102
,
0x10000010
,
0x10000011
,
0x10000110
,
0x00010000ff000000
,
0x00010000ff010100
,
0x0001000000ff0000
,
0x000100000000ff00
,
0x10000112
,
0x10000211
,
0x10010012
,
0x10010111
,
0x10010112
,
0x10010210
,
0x10010212
,
0x10020011
,
0x00010000000000ff
,
0x0001000000000000
,
0x0001000000000001
,
0x0001000000000100
,
0x10020112
,
0x10020211
,
0x11000111
,
0x11000210
,
0x11000212
,
0x11010011
,
0x11010110
,
0x11010111
,
0x0001000000010000
,
0x0001000001ffff01
,
0x0001000001000000
,
0x0001000100ff0101
,
0x11010112
,
0x11010211
,
0x11010212
,
0x11020111
,
0x11020210
,
0x11020212
,
0x12000011
,
0x12000110
,
0x0001000100000000
,
0x00010001010100ff
,
0x000101ffffff01ff
,
0x000101ffffff0101
,
0x12000112
,
0x12010010
,
0x12010012
,
0x12010111
,
0x12020010
,
0x12020011
,
0x12020012
,
0x10000121
,
0x000101ff00010000
,
0x000101ff01ff0000
,
0x000101ff0100ff01
,
0x00010100ffff0000
,
0x10010021
,
0x10010120
,
0x10010122
,
0x10020121
,
0x11000021
,
0x11010022
,
0x11010121
,
0x11010222
,
0x0001010000000000
,
0x000101000001ffff
,
0x0001010000010101
,
0x00010100010001ff
,
0x11020120
,
0x11020221
,
0x12000221
,
0x12010120
,
0x12020121
,
0x10001001
,
0x10011101
,
0x10011201
,
0x00010101ff00ff00
,
0x00010101ff010001
,
0x0001010100ffffff
,
0x0001010100ff01ff
,
0x10021201
,
0x11001101
,
0x11001200
,
0x11001202
,
0x11011001
,
0x11011100
,
0x11011101
,
0x11011102
,
0x00010101000101ff
,
0x0001010101ff0000
,
0x000101010100ff01
,
0x0001010101000101
,
0x11021001
,
0x11021002
,
0x11021101
,
0x11021200
,
0x11021202
,
0x12001001
,
0x12001102
,
0x12001201
,
0x01ffffffffff0101
,
0x01ffffffff01ffff
,
0x01ffffffff01ff01
,
0x01ffffffff0101ff
,
0x12011000
,
0x12011002
,
0x12011101
,
0x12021000
,
0x12021001
,
0x12021201
,
0x10001011
,
0x10001012
,
0x01ffffffff010101
,
0x01ffffff00000000
,
0x01ffffff01ff01ff
,
0x01ffffff01000101
,
0x10001111
,
0x10001212
,
0x10011011
,
0x10011110
,
0x10011111
,
0x10011112
,
0x10011211
,
0x10021010
,
0x01ffffff0101ff01
,
0x01ffffff010100ff
,
0x01ffff000000ff00
,
0x01ffff0000000001
,
0x10021111
,
0x10021212
,
0x11001011
,
0x11001110
,
0x11001111
,
0x11001112
,
0x11001211
,
0x11011010
,
0x01ffff00000001ff
,
0x01ffff0000010000
,
0x01ffff0001ff0000
,
0x01ffff01ffffffff
,
0x11011011
,
0x11011110
,
0x11011111
,
0x11011112
,
0x11011210
,
0x11011211
,
0x11021011
,
0x11021110
,
0x01ffff01ffff01ff
,
0x01ffff01ff000000
,
0x01ffff01ff01ffff
,
0x01ffff01ff0101ff
,
0x11021111
,
0x11021112
,
0x11021211
,
0x12001012
,
0x12001110
,
0x12001111
,
0x12001210
,
0x12011011
,
0x01ffff010100ffff
,
0x01ff00ffffff0000
,
0x01ff00ffff010000
,
0x01ff00ff00ffff01
,
0x12011110
,
0x12011111
,
0x12011112
,
0x12011211
,
0x12011212
,
0x12021111
,
0x12021210
,
0x12021212
,
0x01ff0000ff0000ff
,
0x01ff000000000000
,
0x01ff00000001ff01
,
0x01ff000001ffffff
,
0x10001021
,
0x10001121
,
0x10001221
,
0x10011120
,
0x10011121
,
0x10011220
,
0x10011222
,
0x10021021
,
0x01ff000001010100
,
0x01ff0001ffffff01
,
0x01ff0001ff010001
,
0x01ff000101ff0100
,
0x10021120
,
0x10021221
,
0x11001020
,
0x11001022
,
0x11001121
,
0x11001220
,
0x11011020
,
0x11011021
,
0x01ff000101000001
,
0x01ff0001010100ff
,
0x01ff01ffff00ffff
,
0x01ff01ff00010001
,
0x11011022
,
0x11011121
,
0x11011122
,
0x11011221
,
0x11021022
,
0x11021121
,
0x11021220
,
0x12001021
,
0x01ff01ff01000000
,
0x01ff01ff010101ff
,
0x01ff0100ff000001
,
0x01ff010000ffff00
,
0x12001121
,
0x12001222
,
0x12011120
,
0x12011121
,
0x12021021
,
0x12021120
,
0x12021122
,
0x10002101
,
0x01ff010000000100
,
0x01ff010001ff01ff
,
0x01ff01000101ffff
,
0x01ff0101ffff00ff
,
0x10012001
,
0x10012101
,
0x10012202
,
0x10022101
,
0x11002002
,
0x11002201
,
0x11012000
,
0x11012101
,
0x01ff0101ffff0101
,
0x01ff0101ff0101ff
,
0x01ff010100010000
,
0x0100ffff00ff00ff
,
0x11012200
,
0x11022001
,
0x11022100
,
0x11022102
,
0x11022201
,
0x12002101
,
0x12012001
,
0x12012100
,
0x0100ffff00ff0001
,
0x0100ffff00000100
,
0x0100ffff0100ff00
,
0x0100ff00ffff0000
,
0x12012102
,
0x12012201
,
0x12022101
,
0x10002011
,
0x10002111
,
0x10002112
,
0x10002212
,
0x10012010
,
0x0100ff00ff00ffff
,
0x0100ff00ff00ff01
,
0x0100ff00ff000100
,
0x0100ff00ff010000
,
0x10012110
,
0x10012111
,
0x10012210
,
0x10022011
,
0x10022110
,
0x10022112
,
0x11002010
,
0x11002111
,
0x0100ff0000000000
,
0x0100ff00000100ff
,
0x0100ff0001ff0101
,
0x0100ff0001010101
,
0x11002212
,
0x11012011
,
0x11012012
,
0x11012110
,
0x11012111
,
0x11012112
,
0x11012211
,
0x11022010
,
0x0100ff0100ff00ff
,
0x0100ff0100ff0001
,
0x0100ff0100000100
,
0x0100ff0100010001
,
0x11022012
,
0x11022111
,
0x11022112
,
0x11022212
,
0x12002112
,
0x12002211
,
0x12012012
,
0x12012111
,
0x0100ff0101000000
,
0x010000ffff00ff00
,
0x010000ff0000ffff
,
0x010000ff00000000
,
0x12012112
,
0x12012210
,
0x12022011
,
0x12022110
,
0x12022112
,
0x12022211
,
0x10012122
,
0x11002120
,
0x010000ff010001ff
,
0x010000ff01010001
,
0x01000000ffffff00
,
0x01000000ffff0101
,
0x11002122
,
0x11002221
,
0x11012121
,
0x11012220
,
0x11012222
,
0x11022120
,
0x11022221
,
0x12012120
,
0x01000000ff000000
,
0x01000000ff0100ff
,
0x01000000ff010101
,
0x0100000000ff0000
,
0x12022121
,
0x10100001
,
0x10100100
,
0x10100101
,
0x10100102
,
0x10100201
,
0x10110002
,
0x10110101
,
0x010000000000ff00
,
0x01000000000000ff
,
0x0100000000000000
,
0x0100000000000001
,
0x10110202
,
0x10120001
,
0x10120100
,
0x10120201
,
0x11100000
,
0x11100101
,
0x11100200
,
0x11110001
,
0x0100000000000100
,
0x0100000000010000
,
0x0100000001000000
,
0x0100000100000000
,
0x11110100
,
0x11110101
,
0x11110102
,
0x11110201
,
0x11120101
,
0x11120200
,
0x12100102
,
0x12100201
,
0x01000001000101ff
,
0x0100000101ffff01
,
0x010001ffff000101
,
0x010001ff00ff0100
,
0x12110101
,
0x12110200
,
0x12120000
,
0x12120001
,
0x12120102
,
0x12120201
,
0x10100111
,
0x10100210
,
0x010001ff0000ff00
,
0x010001ff000100ff
,
0x010001ff01ffffff
,
0x01000100ffff0000
,
0x10100211
,
0x10100212
,
0x10110011
,
0x10110110
,
0x10110111
,
0x10110112
,
0x10110210
,
0x10110211
,
0x01000100ff0001ff
,
0x0100010000000000
,
0x010001000001ff00
,
0x0100010001ff0000
,
0x10120010
,
0x10120111
,
0x10120112
,
0x10120210
,
0x10120212
,
0x11100011
,
0x11100110
,
0x11100111
,
0x01000100010000ff
,
0x0100010001000101
,
0x01000101ff00ff01
,
0x0100010100ff0100
,
0x11100112
,
0x11100211
,
0x11110010
,
0x11110011
,
0x11110012
,
0x11110110
,
0x11110111
,
0x11110112
,
0x010001010000ffff
,
0x0100010101010001
,
0x0101ffffffff0101
,
0x0101ffffff0001ff
,
0x11110210
,
0x11110211
,
0x11110212
,
0x11120011
,
0x11120110
,
0x11120111
,
0x11120112
,
0x11120211
,
0x0101ffffff01ffff
,
0x0101ffffff010101
,
0x0101ffff00000000
,
0x0101ffff0101ffff
,
0x12100012
,
0x12100111
,
0x12110011
,
0x12110110
,
0x12110111
,
0x12110112
,
0x12110211
,
0x12120010
,
0x0101ffff010101ff
,
0x0101ff00ff000000
,
0x0101ff0000ff0100
,
0x0101ff000000ff00
,
0x12120111
,
0x12120212
,
0x10100021
,
0x10100122
,
0x10110022
,
0x10110121
,
0x10110222
,
0x10120021
,
0x0101ff0000010000
,
0x0101ff00010000ff
,
0x0101ff0001000001
,
0x0101ff01ff010101
,
0x10120120
,
0x11100022
,
0x11100121
,
0x11100222
,
0x11110021
,
0x11110120
,
0x11110121
,
0x11110122
,
0x0101ff0100000000
,
0x0101ff010101ff00
,
0x010100ffffff0000
,
0x010100ffff010000
,
0x11110221
,
0x11120022
,
0x11120121
,
0x12100121
,
0x12110020
,
0x12110022
,
0x12110121
,
0x12110221
,
0x010100ff00ff01ff
,
0x010100ff000000ff
,
0x010100ff00000101
,
0x010100ff01ffff00
,
0x12110222
,
0x12120120
,
0x10101100
,
0x10101101
,
0x10111001
,
0x10111100
,
0x10111101
,
0x10111102
,
0x01010000ffffff01
,
0x01010000ff000100
,
0x01010000ff01ff01
,
0x0101000000000000
,
0x10111200
,
0x10111201
,
0x10121001
,
0x10121101
,
0x10121200
,
0x10121202
,
0x11101001
,
0x11101100
,
0x01010000000100ff
,
0x010100000101ff01
,
0x01010001ffff0000
,
0x01010001ff00ffff
,
0x11101101
,
0x11101102
,
0x11101201
,
0x11101202
,
0x11111000
,
0x11111001
,
0x11111100
,
0x11111101
,
0x01010001ff010000
,
0x0101000101ffffff
,
0x0101000101ff01ff
,
0x0101000101010101
,
0x11111102
,
0x11111200
,
0x11111201
,
0x11111202
,
0x11121001
,
0x11121002
,
0x11121100
,
0x11121101
,
0x010101ffff01ffff
,
0x010101ff00000000
,
0x010101ff0001ff01
,
0x010101ff0101ffff
,
0x11121102
,
0x11121201
,
0x12101000
,
0x12101200
,
0x12101202
,
0x12111001
,
0x12111100
,
0x12111101
,
0x010101ff010101ff
,
0x01010100ffffffff
,
0x01010100ff000001
,
0x010101000000ff00
,
0x12111102
,
0x12111201
,
0x12121001
,
0x12121100
,
0x12121101
,
0x12121202
,
0x10101011
,
0x10101012
,
0x0101010001010000
,
0x0101010100ff0001
,
0x010101010001ff01
,
0x010101010101ffff
,
0x10101110
,
0x10101111
,
0x10101112
,
0x10101211
,
0x10111010
,
0x10111011
,
0x10111012
,
0x10111110
,
0x10111111
,
0x10111112
,
0x10111211
,
0x10111212
,
0x10121011
,
0x10121110
,
0x10121111
,
0x10121112
,
0x10121211
,
0x11101010
,
0x11101011
,
0x11101012
,
0x11101110
,
0x11101111
,
0x11101112
,
0x11101210
,
0x11101211
,
0x11111010
,
0x11111011
,
0x11111012
,
0x11111110
,
0x11111111
,
0x11111112
,
0x11111210
,
0x11111211
,
0x11111212
,
0x11121010
,
0x11121011
,
0x11121110
,
0x11121111
,
0x11121112
,
0x11121210
,
0x11121211
,
0x11121212
,
0x12101011
,
0x12101110
,
0x12101111
,
0x12101211
,
0x12101212
,
0x12111010
,
0x12111011
,
0x12111110
,
0x12111111
,
0x12111112
,
0x12111210
,
0x12111211
,
0x12121011
,
0x12121110
,
0x12121111
,
0x12121112
,
0x12121211
,
0x10101020
,
0x10101021
,
0x10101022
,
0x10101120
,
0x10101122
,
0x10101220
,
0x10101221
,
0x10111021
,
0x10111120
,
0x10111121
,
0x10111220
,
0x10111221
,
0x10121020
,
0x10121021
,
0x10121022
,
0x10121120
,
0x10121121
,
0x10121122
,
0x10121220
,
0x10121221
,
0x11101021
,
0x11101121
,
0x11101122
,
0x11101220
,
0x11101221
,
0x11101222
,
0x11111020
,
0x11111021
,
0x11111022
,
0x11111120
,
0x11111121
,
0x11111122
,
0x11111220
,
0x11111221
,
0x11111222
,
0x11121021
,
0x11121120
,
0x11121121
,
0x11121221
,
0x12101022
,
0x12101121
,
0x12101122
,
0x12101220
,
0x12101221
,
0x12101222
,
0x12111021
,
0x12111121
,
0x12111222
,
0x12121022
,
0x12121121
,
0x12121122
,
0x12121220
,
0x12121221
,
0x10102100
,
0x10102101
,
0x10102102
,
0x10102201
,
0x10112000
,
0x10112101
,
0x10112200
,
0x10122001
,
0x10122202
,
0x11102101
,
0x11102200
,
0x11102202
,
0x11112001
,
0x11112100
,
0x11112101
,
0x11112102
,
0x11112200
,
0x11112201
,
0x11122000
,
0x11122002
,
0x11122100
,
0x11122101
,
0x12102002
,
0x12102201
,
0x12112000
,
0x12112002
,
0x12112101
,
0x12112200
,
0x12122001
,
0x12122201
,
0x10102011
,
0x10102012
,
0x10102111
,
0x10102212
,
0x10112011
,
0x10112110
,
0x10112111
,
0x10112112
,
0x10112211
,
0x10122111
,
0x11102011
,
0x11102110
,
0x11102111
,
0x11102112
,
0x11102211
,
0x11112010
,
0x11112011
,
0x11112012
,
0x11112110
,
0x11112111
,
0x11112112
,
0x11112210
,
0x11112211
,
0x11112212
,
0x11122011
,
0x11122110
,
0x11122111
,
0x11122112
,
0x11122211
,
0x12102011
,
0x12102111
,
0x12102211
,
0x12112011
,
0x12112110
,
0x12112111
,
0x12112112
,
0x12112210
,
0x12112211
,
0x12122111
,
0x10102120
,
0x10102220
,
0x10112121
,
0x10112222
,
0x10122020
,
0x10122121
,
0x10122122
,
0x10122221
,
0x11102121
,
0x11102220
,
0x11102221
,
0x11112021
,
0x11112121
,
0x11112122
,
0x11112220
,
0x11112221
,
0x11122022
,
0x11122121
,
0x11122220
,
0x11122222
,
0x12102021
,
0x12102222
,
0x12112022
,
0x12112121
,
0x12112122
,
0x12112220
,
0x12112222
,
0x12122021
,
0x10200101
,
0x10210100
,
0x10210102
,
0x10210201
,
0x10220101
,
0x11200100
,
0x11210000
,
0x11210101
,
0x11210102
,
0x11210200
,
0x11210202
,
0x11220001
,
0x11220100
,
0x11220102
,
0x11220201
,
0x12200001
,
0x12210102
,
0x12220101
,
0x10200011
,
0x10200110
,
0x10200112
,
0x10200211
,
0x10210012
,
0x10210111
,
0x10220011
,
0x10220012
,
0x10220112
,
0x10220211
,
0x11200111
,
0x11200211
,
0x11210011
,
0x11210111
,
0x11210112
,
0x11210211
,
0x11220111
,
0x11220112
,
0x11220212
,
0x12200110
,
0x12200212
,
0x12210012
,
0x12210111
,
0x12220011
,
0x12220112
,
0x12220211
,
0x10210021
,
0x10210122
,
0x10210221
,
0x11200020
,
0x11200021
,
0x11200122
,
0x11210121
,
0x11210122
,
0x11210220
,
0x11220020
,
0x12200121
,
0x12210021
,
0x12210122
,
0x12220121
,
0x10211001
,
0x10211002
,
0x10211101
,
0x10211102
,
0x10211202
,
0x10221001
,
0x10221102
,
0x10221201
,
0x11201000
,
0x11201002
,
0x11201101
,
0x11201200
,
0x11201202
,
0x11211001
,
0x11211100
,
0x11211101
,
0x11211102
,
0x11211201
,
0x11211202
,
0x11221000
,
0x11221002
,
0x11221101
,
0x12201100
,
0x12201101
,
0x12201201
,
0x12211000
,
0x12211002
,
0x12211100
,
0x12211101
,
0x12211102
,
0x12211200
,
0x12211202
,
0x12221001
,
0x12221100
,
0x12221201
,
0x10201111
,
0x10201210
,
0x10201212
,
0x10211011
,
0x10211111
,
0x10211112
,
0x10211211
,
0x11201110
,
0x11201111
,
0x11201112
,
0x11201211
,
0x11211010
,
0x11211011
,
0x11211110
,
0x11211111
,
0x11211112
,
0x11211211
,
0x11221011
,
0x11221110
,
0x11221111
,
0x11221112
,
0x11221211
,
0x12201112
,
0x12201211
,
0x12201212
,
0x12211011
,
0x12211111
,
0x12211112
,
0x12211211
,
0x12211212
,
0x12221012
,
0x12221111
,
0x12221112
,
0x12221210
,
0x10201022
,
0x10201221
,
0x10211121
,
0x10221020
,
0x10221122
,
0x10221220
,
0x10221221
,
0x11201020
,
0x11201121
,
0x11201220
,
0x11201222
,
0x11211021
,
0x11211120
,
0x11211121
,
0x11211122
,
0x11211220
,
0x11211222
,
0x11221020
,
0x11221121
,
0x11221220
,
0x12201020
,
0x12201022
,
0x12201121
,
0x12201222
,
0x12211120
,
0x12211122
,
0x12211220
,
0x12211221
,
0x12221020
,
0x12221120
,
0x12221122
,
0x12221222
,
0x10212102
,
0x10212201
,
0x10222101
,
0x11202001
,
0x11212002
,
0x11212101
,
0x11212202
,
0x11222001
,
0x11222201
,
0x12202101
,
0x12212001
,
0x12212200
,
0x12222102
,
0x10202011
,
0x10202110
,
0x10212010
,
0x10212111
,
0x10222011
,
0x10222110
,
0x10222112
,
0x10222211
,
0x11202010
,
0x11202011
,
0x11202111
,
0x11202112
,
0x11202210
,
0x11212011
,
0x11212110
,
0x11212111
,
0x11212112
,
0x11212211
,
0x11222010
,
0x11222111
,
0x11222212
,
0x12202012
,
0x12202110
,
0x12202212
,
0x12212111
,
0x12222011
,
0x12222110
,
0x12222111
,
0x12222211
,
0x10212021
,
0x10212122
,
0x10212220
,
0x11202021
,
0x11202120
,
0x11202221
,
0x11212020
,
0x11212121
,
0x11212220
,
0x11212222
,
0x11222120
,
0x11222121
,
0x11222221
,
0x12202122
,
0x12212120
,
0x12212220
,
0x12212222
,
0x12222122
,
0x20000000
,
0x20000002
,
0x20000200
,
0x20000202
,
0x20020000
,
0x20020002
,
0x20020200
,
0x20020202
,
0x21000101
,
0x21010000
,
0x21010001
,
0x21010100
,
0x21010102
,
0x21010201
,
0x21020101
,
0x22000000
,
0x22000002
,
0x22000200
,
0x22000202
,
0x22010101
,
0x22020000
,
0x22020002
,
0x22020200
,
0x22020202
,
0x20000111
,
0x20010011
,
0x20010110
,
0x20010112
,
0x20010211
,
0x20020111
,
0x21000011
,
0x21000110
,
0x21000211
,
0x21010010
,
0x21010012
,
0x21010111
,
0x21010112
,
0x21010210
,
0x21010211
,
0x21020110
,
0x21020112
,
0x21020211
,
0x22000111
,
0x22000211
,
0x22010110
,
0x22010112
,
0x22010211
,
0x22020111
,
0x20000020
,
0x20000022
,
0x20000220
,
0x20000222
,
0x20010121
,
0x20020020
,
0x20020022
,
0x20020220
,
0x20020222
,
0x21010021
,
0x21010120
,
0x21010221
,
0x21020121
,
0x22000020
,
0x22000022
,
0x22000220
,
0x22000222
,
0x22010121
,
0x22020020
,
0x22020022
,
0x22020220
,
0x22020222
,
0x20011100
,
0x20011201
,
0x21001001
,
0x21001100
,
0x21011001
,
0x21011101
,
0x21011202
,
0x21021001
,
0x21021100
,
0x21021201
,
0x22011100
,
0x22011201
,
0x20001011
,
0x20001211
,
0x20011012
,
0x20011111
,
0x20011212
,
0x20021112
,
0x20021211
,
0x21001010
,
0x21001011
,
0x21001111
,
0x21001210
,
0x21011011
,
0x21011110
,
0x21011111
,
0x21011112
,
0x21011211
,
0x21011212
,
0x21021111
,
0x21021112
,
0x21021210
,
0x21021212
,
0x22001011
,
0x22001110
,
0x22001112
,
0x22001211
,
0x22011010
,
0x22011012
,
0x22011111
,
0x22011210
,
0x22021112
,
0x20011021
,
0x20011122
,
0x20011221
,
0x20021121
,
0x21001021
,
0x21001120
,
0x21001221
,
0x21001222
,
0x21011020
,
0x21011121
,
0x21011221
,
0x21011222
,
0x21021021
,
0x21021122
,
0x21021222
,
0x22001121
,
0x22011021
,
0x22011222
,
0x22021120
,
0x20002000
,
0x20002002
,
0x20002200
,
0x20002202
,
0x20012101
,
0x20022000
,
0x20022002
,
0x20022200
,
0x20022202
,
0x21002001
,
0x21002101
,
0x21012001
,
0x21012100
,
0x21012201
,
0x21022101
,
0x21022201
,
0x22002000
,
0x22002002
,
0x22002200
,
0x22002202
,
0x22012101
,
0x22022000
,
0x22022002
,
0x22022200
,
0x22022202
,
0x20002111
,
0x20002112
,
0x20012011
,
0x20012110
,
0x20012112
,
0x20022111
,
0x21002011
,
0x21002110
,
0x21002112
,
0x21002211
,
0x21012010
,
0x21012012
,
0x21012111
,
0x21012212
,
0x21022011
,
0x21022110
,
0x22002111
,
0x22012112
,
0x22012211
,
0x22022111
,
0x20002020
,
0x20002022
,
0x20002220
,
0x20002222
,
0x20012121
,
0x20022020
,
0x20022022
,
0x20022220
,
0x20022222
,
0x21002121
,
0x21012021
,
0x21012120
,
0x21012122
,
0x22002020
,
0x22002022
,
0x22002220
,
0x22002222
,
0x22012121
,
0x22022020
,
0x22022022
,
0x22022220
,
0x22022222
,
0x20100101
,
0x20110001
,
0x20110102
,
0x20110200
,
0x20110201
,
0x20120101
,
0x21100001
,
0x21100102
,
0x21100201
,
0x21110101
,
0x21110200
,
0x21110202
,
0x21120201
,
0x21120202
,
0x22100101
,
0x22110001
,
0x22110100
,
0x22110102
,
0x22110201
,
0x22120101
,
0x20100011
,
0x20100110
,
0x20100112
,
0x20100211
,
0x20110010
,
0x20110111
,
0x20110210
,
0x20110212
,
0x20120011
,
0x20120110
,
0x20120112
,
0x20120211
,
0x21100010
,
0x21100111
,
0x21110010
,
0x21110011
,
0x21110110
,
0x21110111
,
0x21110112
,
0x21110211
,
0x21120012
,
0x21120111
,
0x22100110
,
0x22100112
,
0x22110012
,
0x22110111
,
0x22110210
,
0x22120011
,
0x22120110
,
0x22120112
,
0x22120211
,
0x20100121
,
0x20110021
,
0x20110120
,
0x20110221
,
0x20120121
,
0x21100120
,
0x21100122
,
0x21100221
,
0x21110020
,
0x21110022
,
0x21110121
,
0x21110220
,
0x21120122
,
0x21120221
,
0x22100121
,
0x22110120
,
0x22110122
,
0x22120221
,
0x20101001
,
0x20101100
,
0x20101102
,
0x20111000
,
0x20111101
,
0x20111200
,
0x20121102
,
0x21101000
,
0x21101202
,
0x21111001
,
0x21111100
,
0x21111101
,
0x21111102
,
0x21111200
,
0x21111201
,
0x21121000
,
0x21121001
,
0x21121002
,
0x21121101
,
0x22101100
,
0x22101102
,
0x22111002
,
0x22111100
,
0x22111101
,
0x22111200
,
0x22121001
,
0x22121201
,
0x20101010
,
0x20101111
,
0x20101210
,
0x20101212
,
0x20111010
,
0x20111011
,
0x20111110
,
0x20111111
,
0x20111112
,
0x20111211
,
0x20121011
,
0x20121111
,
0x20121211
,
0x20121212
,
0x21101011
,
0x21101110
,
0x21101111
,
0x21101112
,
0x21101211
,
0x21111010
,
0x21111011
,
0x21111012
,
0x21111110
,
0x21111111
,
0x21111112
,
0x21111210
,
0x21111211
,
0x21111212
,
0x21121011
,
0x21121110
,
0x21121111
,
0x21121112
,
0x21121211
,
0x22101011
,
0x22101111
,
0x22101210
,
0x22111011
,
0x22111012
,
0x22111110
,
0x22111111
,
0x22111112
,
0x22111211
,
0x22111212
,
0x22121010
,
0x22121012
,
0x22121111
,
0x22121210
,
0x22121212
,
0x20101021
,
0x20101120
,
0x20111020
,
0x20111121
,
0x20111221
,
0x20121020
,
0x20121122
,
0x20121221
,
0x21101121
,
0x21101220
,
0x21101221
,
0x21111021
,
0x21111022
,
0x21111121
,
0x21111122
,
0x21111221
,
0x21121121
,
0x21121220
,
0x22101022
,
0x22101120
,
0x22101221
,
0x22101222
,
0x22111022
,
0x22111120
,
0x22111121
,
0x22121120
,
0x22121122
,
0x22121221
,
0x20102101
,
0x20112102
,
0x20112201
,
0x20122101
,
0x21102001
,
0x21102102
,
0x21112000
,
0x21112002
,
0x21112101
,
0x21112102
,
0x21112202
,
0x21122100
,
0x21122101
,
0x22102101
,
0x22112001
,
0x22112102
,
0x22112201
,
0x22122101
,
0x20102110
,
0x20102112
,
0x20102211
,
0x20112010
,
0x20112012
,
0x20112111
,
0x20112210
,
0x20112212
,
0x20122010
,
0x20122011
,
0x20122110
,
0x20122112
,
0x21102010
,
0x21102012
,
0x21102111
,
0x21102210
,
0x21102212
,
0x21112011
,
0x21112110
,
0x21112111
,
0x21112112
,
0x21112211
,
0x21122012
,
0x21122111
,
0x21122112
,
0x21122212
,
0x22102011
,
0x22102110
,
0x22112010
,
0x22112012
,
0x22112111
,
0x22112212
,
0x22122011
,
0x22122112
,
0x20102121
,
0x20112121
,
0x20122121
,
0x21102120
,
0x21102122
,
0x21102221
,
0x21112020
,
0x21112121
,
0x21112220
,
0x21122021
,
0x22102121
,
0x22112021
,
0x22112120
,
0x22112121
,
0x22112122
,
0x20200000
,
0x20200002
,
0x20200200
,
0x20200202
,
0x20210101
,
0x20220000
,
0x20220002
,
0x20220200
,
0x20220202
,
0x21200101
,
0x21210001
,
0x21210100
,
0x21210102
,
0x21210201
,
0x22200000
,
0x22200002
,
0x22200200
,
0x22200202
,
0x22210101
,
0x22220000
,
0x22220002
,
0x22220200
,
0x22220202
,
0x20200111
,
0x20200211
,
0x20210011
,
0x20210110
,
0x20210112
,
0x20210211
,
0x20210212
,
0x21200112
,
0x21200211
,
0x21210011
,
0x21210111
,
0x21210210
,
0x21210212
,
0x21220011
,
0x21220110
,
0x22200111
,
0x22210010
,
0x22210012
,
0x22210112
,
0x22210211
,
0x20200022
,
0x20200220
,
0x20200222
,
0x20210020
,
0x20210221
,
0x20220022
,
0x20220220
,
0x20220222
,
0x21200121
,
0x21210021
,
0x21210122
,
0x21210221
,
0x21220121
,
0x22200020
,
0x22200022
,
0x22200220
,
0x22200222
,
0x22210121
,
0x22220020
,
0x22220022
,
0x22220220
,
0x22220222
,
0x20211201
,
0x20221101
,
0x21201001
,
0x21201100
,
0x21211000
,
0x21211100
,
0x21211101
,
0x21211200
,
0x21211202
,
0x21221001
,
0x21221101
,
0x21221102
,
0x21221200
,
0x21221201
,
0x22201101
,
0x20201112
,
0x20201211
,
0x20211010
,
0x20211012
,
0x20211111
,
0x20211210
,
0x20221112
,
0x20221211
,
0x21201012
,
0x21201111
,
0x21211011
,
0x21211110
,
0x21211111
,
0x21211112
,
0x21211211
,
0x21221111
,
0x21221212
,
0x22201011
,
0x22201110
,
0x22201111
,
0x22201112
,
0x22201211
,
0x22211012
,
0x22211111
,
0x22211210
,
0x20201121
,
0x20211021
,
0x20211122
,
0x20211222
,
0x20221021
,
0x20221121
,
0x21201120
,
0x21201122
,
0x21201222
,
0x21211022
,
0x21211121
,
0x21211122
,
0x21211220
,
0x21221020
,
0x21221022
,
0x22201122
,
0x22211020
,
0x22211121
,
0x22211122
,
0x22211221
,
0x22221021
,
0x22221120
,
0x22221122
,
0x20202000
,
0x20202002
,
0x20202200
,
0x20202202
,
0x20222000
,
0x20222002
,
0x20222200
,
0x20222202
,
0x21212001
,
0x21212100
,
0x21212102
,
0x21212201
,
0x22202000
,
0x22202002
,
0x22202200
,
0x22202202
,
0x22212101
,
0x22222000
,
0x22222002
,
0x22222200
,
0x22222202
,
0x20202111
,
0x20212110
,
0x20212211
,
0x20222011
,
0x20222111
,
0x21202011
,
0x21212010
,
0x21212111
,
0x21212212
,
0x21222011
,
0x21222112
,
0x21222211
,
0x22212010
,
0x22212112
,
0x20202020
,
0x20202022
,
0x20202220
,
0x20202222
,
0x20222020
,
0x20222022
,
0x20222220
,
0x20222222
,
0x21212021
,
0x21212120
,
0x21212122
,
0x22202020
,
0x22202022
,
0x22202220
,
0x22202222
,
0x22212121
,
0x22222020
,
0x22222022
,
0x22222220
,
0x22222222
,
};
};
static
const
__device__
uint8_t
ksigns_iq2xs
[
128
]
=
{
static
const
__device__
uint8_t
ksigns_iq2xs
[
128
]
=
{
...
...
csrc/quantization/gguf/gguf_kernel.cu
View file @
fc990f97
...
@@ -166,6 +166,11 @@ torch::Tensor ggml_mul_mat_vec_a8(torch::Tensor W, // quant weight
...
@@ -166,6 +166,11 @@ torch::Tensor ggml_mul_mat_vec_a8(torch::Tensor W, // quant weight
(
void
*
)
quant_X
.
data_ptr
(),
(
void
*
)
quant_X
.
data_ptr
(),
(
half
*
)
Y
.
data_ptr
(),
col
,
row
,
stream
);
(
half
*
)
Y
.
data_ptr
(),
col
,
row
,
stream
);
break
;
break
;
case
29
:
mul_mat_vec_iq1_m_q8_1_cuda
((
void
*
)
W
.
data_ptr
(),
(
void
*
)
quant_X
.
data_ptr
(),
(
half
*
)
Y
.
data_ptr
(),
col
,
row
,
stream
);
break
;
}
}
return
Y
;
return
Y
;
}
}
...
...
csrc/quantization/gguf/mmvq.cuh
View file @
fc990f97
...
@@ -157,6 +157,14 @@ static void mul_mat_vec_iq1_s_q8_1_cuda(const void * vx, const void * vy, half *
...
@@ -157,6 +157,14 @@ static void mul_mat_vec_iq1_s_q8_1_cuda(const void * vx, const void * vy, half *
<<<
block_nums
,
block_dims
,
0
,
stream
>>>
(
vx
,
vy
,
dst
,
ncols
,
nrows
);
<<<
block_nums
,
block_dims
,
0
,
stream
>>>
(
vx
,
vy
,
dst
,
ncols
,
nrows
);
}
}
static
void
mul_mat_vec_iq1_m_q8_1_cuda
(
const
void
*
vx
,
const
void
*
vy
,
half
*
dst
,
const
int
ncols
,
const
int
nrows
,
cudaStream_t
stream
)
{
const
int
block_num_y
=
(
nrows
+
GGML_CUDA_MMV_Y
-
1
)
/
GGML_CUDA_MMV_Y
;
const
dim3
block_nums
(
block_num_y
,
1
,
1
);
const
dim3
block_dims
(
WARP_SIZE
,
GGML_CUDA_MMV_Y
,
1
);
mul_mat_vec_q
<
QK_K
,
QI1_M
,
block_iq1_m
,
1
,
vec_dot_iq1_m_q8_1
>
<<<
block_nums
,
block_dims
,
0
,
stream
>>>
(
vx
,
vy
,
dst
,
ncols
,
nrows
);
}
static
void
mul_mat_vec_iq4_nl_q8_1_cuda
(
const
void
*
vx
,
const
void
*
vy
,
half
*
dst
,
const
int
ncols
,
const
int
nrows
,
cudaStream_t
stream
)
{
static
void
mul_mat_vec_iq4_nl_q8_1_cuda
(
const
void
*
vx
,
const
void
*
vy
,
half
*
dst
,
const
int
ncols
,
const
int
nrows
,
cudaStream_t
stream
)
{
const
int
block_num_y
=
(
nrows
+
GGML_CUDA_MMV_Y
-
1
)
/
GGML_CUDA_MMV_Y
;
const
int
block_num_y
=
(
nrows
+
GGML_CUDA_MMV_Y
-
1
)
/
GGML_CUDA_MMV_Y
;
const
dim3
block_nums
(
block_num_y
,
1
,
1
);
const
dim3
block_nums
(
block_num_y
,
1
,
1
);
...
...
csrc/quantization/gguf/vecdotq.cuh
View file @
fc990f97
// copied and adapted from https://github.com/ggerganov/llama.cpp/blob/b2899/ggml-cuda/vecdotq.cuh
// copied and adapted from https://github.com/ggerganov/llama.cpp/blob/b2899/ggml-cuda/vecdotq.cuh
// and https://github.com/ggerganov/llama.cpp/blob/b2899/ggml-cuda/mmq.cu
// and https://github.com/ggerganov/llama.cpp/blob/b2899/ggml-cuda/mmq.cu
static
__device__
__forceinline__
int
get_int_b2
(
const
void
*
x
,
const
int
&
i32
)
{
const
uint16_t
*
x16
=
(
const
uint16_t
*
)
x
;
// assume at least 2 byte alignment
int
x32
=
x16
[
2
*
i32
+
0
]
<<
0
;
x32
|=
x16
[
2
*
i32
+
1
]
<<
16
;
return
x32
;
}
static
__device__
__forceinline__
int
get_int_b4
(
const
void
*
x
,
const
int
&
i32
)
{
return
((
const
int
*
)
x
)[
i32
];
// assume at least 4 byte alignment
}
static
__device__
__forceinline__
int
get_int_from_int8
(
const
int8_t
*
x8
,
const
int
&
i32
)
{
static
__device__
__forceinline__
int
get_int_from_int8
(
const
int8_t
*
x8
,
const
int
&
i32
)
{
const
uint16_t
*
x16
=
(
const
uint16_t
*
)
(
x8
+
sizeof
(
int
)
*
i32
);
// assume at least 2 byte alignment
const
uint16_t
*
x16
=
(
const
uint16_t
*
)
(
x8
+
sizeof
(
int
)
*
i32
);
// assume at least 2 byte alignment
int
x32
=
0
;
int
x32
=
0
;
...
@@ -1658,28 +1671,76 @@ static __device__ __forceinline__ float vec_dot_iq3_s_q8_1(
...
@@ -1658,28 +1671,76 @@ static __device__ __forceinline__ float vec_dot_iq3_s_q8_1(
static
__device__
__forceinline__
float
vec_dot_iq1_s_q8_1
(
static
__device__
__forceinline__
float
vec_dot_iq1_s_q8_1
(
const
void
*
__restrict__
vbq
,
const
block_q8_1
*
__restrict__
bq8_1
,
const
int
&
iqs
)
{
const
void
*
__restrict__
vbq
,
const
block_q8_1
*
__restrict__
bq8_1
,
const
int
&
iqs
)
{
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 610
const
block_iq1_s
*
bq1
=
(
const
block_iq1_s
*
)
vbq
;
const
block_iq1_s
*
bq1
=
(
const
block_iq1_s
*
)
vbq
;
const
int
ib32
=
iqs
;
const
int
qs_packed
=
get_int_b2
(
bq1
->
qs
,
iqs
);
int
sumi1
=
0
,
sumi2
=
0
,
sumi3
=
0
,
sumi4
=
0
;
const
uint8_t
*
qs
=
(
const
uint8_t
*
)
&
qs_packed
;
const
uint8_t
h1
=
bq1
->
scales
[
2
*
ib32
+
0
];
const
uint8_t
h2
=
bq1
->
scales
[
2
*
ib32
+
1
];
const
int
qh
=
bq1
->
qh
[
iqs
];
const
int
*
q8
=
(
const
int
*
)
bq8_1
[
ib32
].
qs
;
const
int
*
grid1
=
(
const
int
*
)(
iq1s_grid
+
(
bq1
->
qs
[
4
*
ib32
+
0
]
|
((
h1
&
0x08
)
<<
5
)));
int
sumi
=
0
;
const
int
*
grid2
=
(
const
int
*
)(
iq1s_grid
+
(
bq1
->
qs
[
4
*
ib32
+
1
]
|
((
h1
&
0x80
)
<<
1
)));
#pragma unroll
const
int
*
grid3
=
(
const
int
*
)(
iq1s_grid
+
(
bq1
->
qs
[
4
*
ib32
+
2
]
|
((
h2
&
0x08
)
<<
5
)));
for
(
int
l0
=
0
;
l0
<
8
;
l0
+=
2
)
{
const
int
*
grid4
=
(
const
int
*
)(
iq1s_grid
+
(
bq1
->
qs
[
4
*
ib32
+
3
]
|
((
h2
&
0x80
)
<<
1
)));
const
int
grid
=
iq1s_grid_gpu
[
qs
[
l0
/
2
]
|
(((
qh
>>
3
*
(
l0
/
2
))
&
0x07
)
<<
8
)];
for
(
int
j
=
0
;
j
<
2
;
++
j
)
{
sumi1
=
__dp4a
(
q8
[
j
+
0
],
grid1
[
j
],
sumi1
);
const
int
grid0
=
(
grid
>>
0
)
&
0x0F0F0F0F
;
sumi2
=
__dp4a
(
q8
[
j
+
2
],
grid2
[
j
],
sumi2
);
const
int
grid1
=
(
grid
>>
4
)
&
0x0F0F0F0F
;
sumi3
=
__dp4a
(
q8
[
j
+
4
],
grid3
[
j
],
sumi3
);
sumi4
=
__dp4a
(
q8
[
j
+
6
],
grid4
[
j
],
sumi4
);
const
int
u0
=
get_int_b4
(
bq8_1
[
iqs
].
qs
,
l0
+
0
);
}
const
int
u1
=
get_int_b4
(
bq8_1
[
iqs
].
qs
,
l0
+
1
);
const
float
d
=
__half2float
(
bq1
->
d
)
*
__low2float
(
bq8_1
[
ib32
].
ds
);
return
d
*
(
sumi1
*
(
2
*
(
h1
&
7
)
+
1
)
+
sumi2
*
(
2
*
((
h1
>>
4
)
&
7
)
+
1
)
+
sumi
=
__dp4a
(
grid0
,
u0
,
sumi
);
sumi3
*
(
2
*
(
h2
&
7
)
+
1
)
+
sumi4
*
(
2
*
((
h2
>>
4
)
&
7
)
+
1
));
sumi
=
__dp4a
(
grid1
,
u1
,
sumi
);
#endif
}
const
float
d1q
=
__half2float
(
bq1
->
d
)
*
(((
qh
>>
11
)
&
0x0E
)
+
1
);
const
float
delta
=
-
1.0
f
+
IQ1S_DELTA
-
(
qh
&
0x8000
)
*
(
2.0
f
*
IQ1S_DELTA
/
0x8000
);
const
float2
ds
=
__half22float2
(
bq8_1
[
iqs
].
ds
);
return
d1q
*
(
ds
.
x
*
sumi
+
ds
.
y
*
delta
);
}
static
__device__
__forceinline__
float
vec_dot_iq1_m_q8_1
(
const
void
*
__restrict__
vbq
,
const
block_q8_1
*
__restrict__
bq8_1
,
const
int
&
iqs
)
{
const
block_iq1_m
*
bq1
=
(
const
block_iq1_m
*
)
vbq
;
const
int
qs_packed
=
get_int_b4
(
bq1
->
qs
,
iqs
);
const
uint8_t
*
qs
=
(
const
uint8_t
*
)
&
qs_packed
;
int
sumi
[
2
]
=
{
0
};
float
sumf
[
2
]
=
{
0.0
f
};
#pragma unroll
for
(
int
l0
=
0
;
l0
<
8
;
l0
+=
2
)
{
const
int
qhl
=
bq1
->
qh
[
2
*
iqs
+
l0
/
4
]
>>
(
4
*
((
l0
/
2
)
%
2
));
const
int
grid
=
iq1s_grid_gpu
[
qs
[
l0
/
2
]
|
((
qhl
&
0x07
)
<<
8
)];
const
int
grid0
=
(
grid
>>
0
)
&
0x0F0F0F0F
;
const
int
grid1
=
(
grid
>>
4
)
&
0x0F0F0F0F
;
const
int
u0
=
get_int_b4
(
bq8_1
[
iqs
].
qs
,
l0
+
0
);
const
int
u1
=
get_int_b4
(
bq8_1
[
iqs
].
qs
,
l0
+
1
);
sumi
[
l0
/
4
]
=
__dp4a
(
grid0
,
u0
,
sumi
[
l0
/
4
]);
sumi
[
l0
/
4
]
=
__dp4a
(
grid1
,
u1
,
sumi
[
l0
/
4
]);
const
float
delta
=
-
1.0
f
+
IQ1M_DELTA
-
(
qhl
&
0x08
)
*
(
2.0
f
*
IQ1M_DELTA
/
0x08
);
int
sumy
=
0
;
sumy
=
__dp4a
(
u0
,
0x01010101
,
sumy
);
sumy
=
__dp4a
(
u1
,
0x01010101
,
sumy
);
sumf
[
l0
/
4
]
+=
delta
*
sumy
;
}
const
uint16_t
*
sc
=
(
const
uint16_t
*
)
bq1
->
scales
;
iq1m_scale_t
scale
;
scale
.
u16
=
(
sc
[
0
]
>>
12
)
|
((
sc
[
1
]
>>
8
)
&
0x00F0
)
|
((
sc
[
2
]
>>
4
)
&
0x0F00
)
|
(
sc
[
3
]
&
0xF000
);
const
float
d
=
__half2float
(
scale
.
f16
)
*
__low2float
(
bq8_1
[
iqs
].
ds
);
const
int
tmp
=
sc
[
iqs
/
2
]
>>
(
6
*
(
iqs
%
2
));
const
int
sc0
=
2
*
((
tmp
>>
0
)
&
0x07
)
+
1
;
const
int
sc1
=
2
*
((
tmp
>>
3
)
&
0x07
)
+
1
;
return
d
*
((
sumi
[
0
]
+
sumf
[
0
])
*
sc0
+
(
sumi
[
1
]
+
sumf
[
1
])
*
sc1
);
}
}
static
__device__
__forceinline__
void
get_int_from_table_16
(
const
uint32_t
&
q4
,
const
uint8_t
*
values
,
static
__device__
__forceinline__
void
get_int_from_table_16
(
const
uint32_t
&
q4
,
const
uint8_t
*
values
,
...
...
requirements-common.txt
View file @
fc990f97
...
@@ -24,7 +24,7 @@ filelock >= 3.10.4 # filelock starts to support `mode` argument from 3.10.4
...
@@ -24,7 +24,7 @@ filelock >= 3.10.4 # filelock starts to support `mode` argument from 3.10.4
partial-json-parser # used for parsing partial JSON outputs
partial-json-parser # used for parsing partial JSON outputs
pyzmq
pyzmq
msgspec
msgspec
gguf == 0.
9.1
gguf == 0.
10.0
importlib_metadata
importlib_metadata
mistral_common >= 1.4.0
mistral_common >= 1.4.0
pyyaml
pyyaml
...
...
tests/kernels/test_gguf.py
0 → 100644
View file @
fc990f97
from
pathlib
import
Path
from
typing
import
List
import
pytest
import
torch
from
gguf
import
GGMLQuantizationType
,
GGUFReader
,
ReaderTensor
,
dequantize
from
huggingface_hub
import
snapshot_download
import
vllm._custom_ops
as
ops
GGUF_SAMPLE
=
snapshot_download
(
"Isotr0py/test-gguf-sample"
)
def
get_gguf_sample_tensors
(
hidden_size
:
int
,
quant_type
:
GGMLQuantizationType
)
->
List
[
ReaderTensor
]:
sample_dir
=
GGUF_SAMPLE
filename
=
f
"Quant_
{
quant_type
.
name
}
_
{
hidden_size
}
.gguf"
sample_file
=
Path
(
sample_dir
)
/
filename
return
GGUFReader
(
sample_file
).
tensors
DTYPES
=
[
torch
.
half
]
# Hidden_size for testing, must match the sample file in HF repo,
# we have `hidden_size = 256, 1024` for test in HF repo currently.
HIDDEN_SIZES
=
[
256
,
1024
]
NUM_TOKENS
=
[
7
,
83
,
128
,
2048
]
# Arbitrary values for testing
SEEDS
=
[
0
]
QUANT_TYPES
=
[
# i-matrix
GGMLQuantizationType
.
IQ1_M
,
GGMLQuantizationType
.
IQ1_S
,
GGMLQuantizationType
.
IQ2_S
,
GGMLQuantizationType
.
IQ2_XS
,
GGMLQuantizationType
.
IQ3_S
,
GGMLQuantizationType
.
IQ3_XXS
,
GGMLQuantizationType
.
IQ4_NL
,
GGMLQuantizationType
.
IQ4_XS
,
# k-quants
GGMLQuantizationType
.
Q2_K
,
GGMLQuantizationType
.
Q3_K
,
GGMLQuantizationType
.
Q4_K
,
GGMLQuantizationType
.
Q5_K
,
GGMLQuantizationType
.
Q6_K
,
# standard quantization
GGMLQuantizationType
.
Q4_0
,
GGMLQuantizationType
.
Q5_0
,
GGMLQuantizationType
.
Q8_0
,
]
@
pytest
.
mark
.
parametrize
(
"hidden_size"
,
HIDDEN_SIZES
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
DTYPES
)
@
pytest
.
mark
.
parametrize
(
"quant_type"
,
QUANT_TYPES
)
@
torch
.
inference_mode
()
def
test_dequantize
(
hidden_size
:
int
,
dtype
:
torch
.
dtype
,
quant_type
:
GGMLQuantizationType
):
tensors
=
get_gguf_sample_tensors
(
hidden_size
,
quant_type
)
for
tensor
in
tensors
:
shape_str
=
tensor
.
name
.
split
(
"_"
)[
-
1
]
shape
=
map
(
int
,
shape_str
.
split
(
"x"
))
ref_output
=
torch
.
tensor
(
dequantize
(
tensor
.
data
,
quant_type
),
device
=
"cuda"
).
to
(
dtype
)
output
=
ops
.
ggml_dequantize
(
torch
.
tensor
(
tensor
.
data
,
device
=
"cuda"
),
quant_type
,
*
list
(
shape
)).
to
(
dtype
)
torch
.
testing
.
assert_close
(
output
,
ref_output
,
atol
=
1e-2
,
rtol
=
4e-2
)
@
pytest
.
mark
.
parametrize
(
"hidden_size"
,
HIDDEN_SIZES
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
DTYPES
)
@
pytest
.
mark
.
parametrize
(
"quant_type"
,
QUANT_TYPES
)
@
torch
.
inference_mode
()
def
test_mmvq
(
hidden_size
:
int
,
dtype
:
torch
.
dtype
,
quant_type
:
GGMLQuantizationType
):
torch
.
cuda
.
manual_seed_all
(
0
)
tensors
=
get_gguf_sample_tensors
(
hidden_size
,
quant_type
)
x
=
torch
.
rand
((
1
,
hidden_size
),
dtype
=
dtype
,
device
=
"cuda"
)
for
tensor
in
tensors
:
weight
=
torch
.
tensor
(
dequantize
(
tensor
.
data
,
quant_type
),
device
=
"cuda"
).
to
(
dtype
)
ref_output
=
x
@
weight
.
T
qweight
=
torch
.
tensor
(
tensor
.
data
,
device
=
"cuda"
)
output
=
ops
.
ggml_mul_mat_vec_a8
(
qweight
,
x
,
quant_type
,
qweight
.
shape
[
0
]).
to
(
dtype
)
torch
.
testing
.
assert_close
(
output
,
ref_output
,
atol
=
1
,
rtol
=
1e-1
)
@
pytest
.
mark
.
parametrize
(
"num_tokens"
,
NUM_TOKENS
)
@
pytest
.
mark
.
parametrize
(
"hidden_size"
,
HIDDEN_SIZES
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
DTYPES
)
@
pytest
.
mark
.
parametrize
(
"quant_type"
,
[
# k-quants
GGMLQuantizationType
.
Q2_K
,
GGMLQuantizationType
.
Q3_K
,
GGMLQuantizationType
.
Q4_K
,
GGMLQuantizationType
.
Q5_K
,
GGMLQuantizationType
.
Q6_K
,
# standard quants
GGMLQuantizationType
.
Q4_0
,
GGMLQuantizationType
.
Q5_0
,
GGMLQuantizationType
.
Q8_0
,
])
@
torch
.
inference_mode
()
def
test_mmq
(
num_tokens
:
int
,
hidden_size
:
int
,
dtype
:
torch
.
dtype
,
quant_type
:
GGMLQuantizationType
):
torch
.
cuda
.
manual_seed_all
(
0
)
tensors
=
get_gguf_sample_tensors
(
hidden_size
,
quant_type
)
x
=
torch
.
rand
((
num_tokens
,
hidden_size
),
dtype
=
dtype
,
device
=
"cuda"
)
for
tensor
in
tensors
:
weight
=
torch
.
tensor
(
dequantize
(
tensor
.
data
,
quant_type
),
device
=
"cuda"
).
to
(
dtype
)
ref_output
=
x
@
weight
.
T
qweight
=
torch
.
tensor
(
tensor
.
data
,
device
=
"cuda"
)
output
=
ops
.
ggml_mul_mat_a8
(
qweight
,
x
,
quant_type
,
qweight
.
shape
[
0
]).
to
(
dtype
)
torch
.
testing
.
assert_close
(
output
,
ref_output
,
atol
=
1
,
rtol
=
1e-1
)
vllm/model_executor/layers/quantization/gguf.py
View file @
fc990f97
...
@@ -55,7 +55,10 @@ class GGUFConfig(QuantizationConfig):
...
@@ -55,7 +55,10 @@ class GGUFConfig(QuantizationConfig):
def
_fuse_mul_mat
(
x
:
torch
.
Tensor
,
qweight
:
torch
.
Tensor
,
def
_fuse_mul_mat
(
x
:
torch
.
Tensor
,
qweight
:
torch
.
Tensor
,
qweight_type
:
int
)
->
torch
.
Tensor
:
qweight_type
:
int
)
->
torch
.
Tensor
:
# use dequantize mulmat for IQmatrix, mmq for k-quants
# use dequantize mulmat for IQmatrix, mmq for k-quants
if
qweight_type
>=
16
:
if
x
.
shape
[
0
]
==
1
:
# enable mmvq in contiguous batching
y
=
ops
.
ggml_mul_mat_vec_a8
(
qweight
,
x
,
qweight_type
,
qweight
.
shape
[
0
])
elif
qweight_type
>=
16
:
block_size
,
type_size
=
gguf
.
GGML_QUANT_SIZES
[
qweight_type
]
block_size
,
type_size
=
gguf
.
GGML_QUANT_SIZES
[
qweight_type
]
shape
=
(
qweight
.
shape
[
0
],
qweight
.
shape
[
1
]
//
type_size
*
block_size
)
shape
=
(
qweight
.
shape
[
0
],
qweight
.
shape
[
1
]
//
type_size
*
block_size
)
weight
=
ops
.
ggml_dequantize
(
qweight
,
qweight_type
,
*
shape
)
weight
=
ops
.
ggml_dequantize
(
qweight
,
qweight_type
,
*
shape
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment