Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3abfe221
Unverified
Commit
3abfe221
authored
Jul 01, 2025
by
czhu-cohere
Committed by
GitHub
Jul 01, 2025
Browse files
Enable group size 64 for Machete (#20290)
Signed-off-by:
czhu-cohere
<
conway.zhu@cohere.com
>
parent
e81fbefe
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
25 additions
and
8 deletions
+25
-8
tests/kernels/quantization/test_machete_mm.py
tests/kernels/quantization/test_machete_mm.py
+4
-4
vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py
...or/layers/quantization/kernels/mixed_precision/machete.py
+3
-3
vllm/model_executor/layers/quantization/utils/machete_utils.py
...model_executor/layers/quantization/utils/machete_utils.py
+18
-1
No files found.
tests/kernels/quantization/test_machete_mm.py
View file @
3abfe221
...
...
@@ -14,6 +14,8 @@ import torch
from
tests.kernels.utils
import
opcheck
from
vllm
import
_custom_ops
as
ops
from
vllm.model_executor.layers.quantization.utils.machete_utils
import
(
query_machete_supported_group_sizes
)
from
vllm.model_executor.layers.quantization.utils.quant_utils
import
(
pack_rows
,
quantize_weights
)
from
vllm.platforms
import
current_platform
...
...
@@ -46,8 +48,6 @@ MNK_SHAPES = [
(
1024
,
8192
,
4096
),
]
GROUP_SIZES_TO_TEST
:
list
[
Optional
[
int
]]
=
[
128
,
-
1
]
@
dataclass
class
TypeConfig
:
...
...
@@ -270,7 +270,7 @@ def test_machete_all_schedules(shape, types: TypeConfig):
if
types
.
group_scale_type
is
None
:
group_sizes
=
[
None
]
else
:
group_sizes
=
GROUP_SIZES_TO_TEST
group_sizes
=
query_machete_supported_group_sizes
(
types
.
act_type
)
for
group_size
in
group_sizes
:
if
not
group_size_valid
(
shape
,
group_size
):
...
...
@@ -299,7 +299,7 @@ def test_machete_heuristic(shape, types: TypeConfig):
if
types
.
group_scale_type
is
None
:
group_sizes
=
[
None
]
else
:
group_sizes
=
GROUP_SIZES_TO_TEST
group_sizes
=
query_machete_supported_group_sizes
(
types
.
act_type
)
for
group_size
in
group_sizes
:
if
not
group_size_valid
(
shape
,
group_size
):
...
...
vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py
View file @
3abfe221
...
...
@@ -8,7 +8,7 @@ import torch
from
vllm
import
_custom_ops
as
ops
from
vllm.model_executor.layers.quantization.utils.machete_utils
import
(
MACHETE_SUPPORTED_GROUP_SIZES
,
check_machete_supports_shape
,
check_machete_supports_shape
,
query_machete_supported_group_sizes
,
query_machete_supported_quant_types
)
from
vllm.model_executor.layers.quantization.utils.quant_utils
import
(
pack_quantized_values_into_int32
,
unpack_quantized_values_into_int32
)
...
...
@@ -40,10 +40,10 @@ class MacheteLinearKernel(MPLinearKernel):
"Machete, supported types are: "
\
f
"
{
query_machete_supported_quant_types
(
c
.
zero_points
)
}
"
if
c
.
group_size
not
in
MACHETE_SUPPORTED_GROUP_SIZES
:
if
c
.
group_size
not
in
query_machete_supported_group_sizes
(
c
.
act_type
)
:
return
False
,
f
"Group size (
{
c
.
group_size
}
) not supported by "
\
"Machete, supported group sizes are: "
\
f
"
{
MACHETE_SUPPORTED_GROUP_SIZES
}
"
f
"
{
query_machete_supported_group_sizes
(
c
.
act_type
)
}
"
return
check_machete_supports_shape
(
c
.
partition_weight_shape
[
0
],
c
.
partition_weight_shape
[
1
])
...
...
vllm/model_executor/layers/quantization/utils/machete_utils.py
View file @
3abfe221
...
...
@@ -7,7 +7,6 @@ import torch
from
vllm.scalar_type
import
ScalarType
,
scalar_types
MACHETE_SUPPORTED_GROUP_SIZES
=
[
-
1
,
128
]
MACHETE_PREPACKED_BLOCK_SHAPE
=
[
64
,
128
]
...
...
@@ -22,6 +21,24 @@ def query_machete_supported_act_types(zero_points: bool) -> list[ScalarType]:
return
[
torch
.
float16
,
torch
.
bfloat16
]
def
query_machete_supported_group_sizes
(
act_type
:
torch
.
dtype
)
->
list
[
int
]:
"""
Queries the supported group sizes for Machete based on the activation type.
Args:
act_type: The activation data type (torch.float16, torch.bfloat16).
Returns:
A list of supported group sizes. The group size must
be divisible by `TileShapeK = 128 * 8 // num_bits(act_type)`.
-1 indicates per-channel quantization.
"""
if
act_type
in
[
torch
.
float16
,
torch
.
bfloat16
]:
return
[
-
1
,
64
,
128
]
else
:
return
[
-
1
,
128
]
def
check_machete_supports_shape
(
in_features
:
int
,
out_featrues
:
int
)
\
->
tuple
[
bool
,
Optional
[
str
]]:
if
in_features
%
MACHETE_PREPACKED_BLOCK_SHAPE
[
0
]
!=
0
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment