Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a60985b0
Unverified
Commit
a60985b0
authored
Mar 01, 2026
by
Jesse Cai
Committed by
GitHub
Mar 01, 2026
Browse files
Fix deprecated v1 config tests (#35327)
Signed-off-by:
Jesse Cai
<
jessecai@fb.com
>
parent
8b5014d3
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
1 addition
and
17 deletions
+1
-17
tests/quantization/test_torchao.py
tests/quantization/test_torchao.py
+1
-17
No files found.
tests/quantization/test_torchao.py
View file @
a60985b0
...
@@ -20,7 +20,7 @@ TORCHAO_AVAILABLE = importlib.util.find_spec("torchao") is not None
...
@@ -20,7 +20,7 @@ TORCHAO_AVAILABLE = importlib.util.find_spec("torchao") is not None
@
pytest
.
mark
.
skipif
(
not
TORCHAO_AVAILABLE
,
reason
=
"torchao is not available"
)
@
pytest
.
mark
.
skipif
(
not
TORCHAO_AVAILABLE
,
reason
=
"torchao is not available"
)
def
test_pre_quantized_model
(
vllm_runner
):
def
test_pre_quantized_model
(
vllm_runner
):
with
vllm_runner
(
with
vllm_runner
(
"
drisspg/fp8-opt-125m
"
,
"
torchao-testing/opt-125m-Float8WeightOnlyConfig-v2-0.15.0
"
,
quantization
=
"torchao"
,
quantization
=
"torchao"
,
dtype
=
"bfloat16"
,
dtype
=
"bfloat16"
,
enforce_eager
=
True
,
enforce_eager
=
True
,
...
@@ -52,22 +52,6 @@ def test_opt_125m_int8wo_model_loading_with_params(vllm_runner, pt_load_map_loca
...
@@ -52,22 +52,6 @@ def test_opt_125m_int8wo_model_loading_with_params(vllm_runner, pt_load_map_loca
assert
output
assert
output
@
pytest
.
mark
.
skipif
(
not
TORCHAO_AVAILABLE
,
reason
=
"torchao is not available"
)
def
test_opt_125m_int4wo_model_per_module_quant
(
vllm_runner
):
torch
.
_dynamo
.
reset
()
model_name
=
"jerryzh168/opt-125m-int4wo-per-module"
with
vllm_runner
(
model_name
=
model_name
,
quantization
=
"torchao"
,
dtype
=
"bfloat16"
,
pt_load_map_location
=
"cuda:0"
,
enforce_eager
=
True
,
)
as
llm
:
output
=
llm
.
generate_greedy
([
"The capital of France is"
],
max_tokens
=
4
)
assert
output
@
pytest
.
mark
.
skipif
(
not
TORCHAO_AVAILABLE
,
reason
=
"torchao is not available"
)
@
pytest
.
mark
.
skipif
(
not
TORCHAO_AVAILABLE
,
reason
=
"torchao is not available"
)
def
test_qwenvl_int8wo_model_loading_with_params
(
vllm_runner
):
def
test_qwenvl_int8wo_model_loading_with_params
(
vllm_runner
):
torch
.
_dynamo
.
reset
()
torch
.
_dynamo
.
reset
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment