Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a83ff278
Unverified
Commit
a83ff278
authored
Oct 09, 2025
by
Jerry Zhang
Committed by
GitHub
Oct 09, 2025
Browse files
[torchao] Add support for ModuleFqnToConfig using regex (#26001)
Signed-off-by:
Jerry Zhang
<
jerryzh168@gmail.com
>
parent
cf4cd6c2
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
38 additions
and
3 deletions
+38
-3
tests/quantization/test_torchao.py
tests/quantization/test_torchao.py
+17
-0
vllm/model_executor/layers/quantization/torchao.py
vllm/model_executor/layers/quantization/torchao.py
+21
-3
No files found.
tests/quantization/test_torchao.py
View file @
a83ff278
...
@@ -233,5 +233,22 @@ def test_opt_125m_float8_weight_only_safetensors_model_loading_with_params(vllm_
...
@@ -233,5 +233,22 @@ def test_opt_125m_float8_weight_only_safetensors_model_loading_with_params(vllm_
assert
output
assert
output
@
pytest
.
mark
.
skipif
(
not
TORCHAO_AVAILABLE
,
reason
=
"torchao is not available"
)
@
pytest
.
mark
.
skip
(
reason
=
"since torchao nightly is only compatible with torch nightly"
"currently https://github.com/pytorch/ao/issues/2919, we'll have to skip "
"torchao tests that requires newer versions (0.14.0.dev+) for now"
)
def
test_opt_125m_module_fqn_to_config_regex_model
(
vllm_runner
):
torch
.
_dynamo
.
reset
()
model_name
=
"torchao-testing/opt-125m-ModuleFqnToConfig-v1-regex-0.14.0.dev"
with
vllm_runner
(
model_name
=
model_name
,
dtype
=
"bfloat16"
,
pt_load_map_location
=
"cuda:0"
)
as
llm
:
output
=
llm
.
generate_greedy
([
"The capital of France is"
],
max_tokens
=
32
)
assert
output
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
pytest
.
main
([
__file__
])
pytest
.
main
([
__file__
])
vllm/model_executor/layers/quantization/torchao.py
View file @
a83ff278
...
@@ -5,6 +5,7 @@ import json
...
@@ -5,6 +5,7 @@ import json
from
importlib.util
import
find_spec
from
importlib.util
import
find_spec
from
typing
import
Any
,
Optional
from
typing
import
Any
,
Optional
import
regex
as
re
import
torch
import
torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
packaging
import
version
from
packaging
import
version
...
@@ -192,9 +193,26 @@ class TorchAOConfig(QuantizationConfig):
...
@@ -192,9 +193,26 @@ class TorchAOConfig(QuantizationConfig):
module_fqn
=
prefix
module_fqn
=
prefix
if
isinstance
(
self
.
torchao_config
,
ModuleFqnToConfig
):
if
isinstance
(
self
.
torchao_config
,
ModuleFqnToConfig
):
module_fqn_to_config
=
self
.
torchao_config
.
module_fqn_to_config
module_fqn_to_config
=
self
.
torchao_config
.
module_fqn_to_config
c
=
module_fqn_to_config
.
get
(
module_fqn
)
or
module_fqn_to_config
.
get
(
c
=
None
"_default"
,
None
if
module_fqn
in
module_fqn_to_config
:
assert
not
module_fqn
.
startswith
(
"re:"
),
(
"module fqn should not start with"
"`re:`, which is used for specifying regex"
)
)
c
=
module_fqn_to_config
[
module_fqn
]
else
:
for
maybe_module_fqn_pattern
in
module_fqn_to_config
:
if
not
maybe_module_fqn_pattern
.
startswith
(
"re:"
):
continue
elif
re
.
fullmatch
(
maybe_module_fqn_pattern
[
3
:],
module_fqn
):
# we'll apply the config for first fully matched pattern
c
=
module_fqn_to_config
[
maybe_module_fqn_pattern
]
break
else
:
# fallback to use default if no module specific
# config is provided
c
=
module_fqn_to_config
.
get
(
"_default"
,
None
)
if
c
is
not
None
:
if
c
is
not
None
:
current_torchao_config
=
TorchAOConfig
(
current_torchao_config
=
TorchAOConfig
(
c
,
self
.
skip_modules
,
self
.
is_checkpoint_torchao_serialized
c
,
self
.
skip_modules
,
self
.
is_checkpoint_torchao_serialized
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment