Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f4a8a374
Unverified
Commit
f4a8a374
authored
May 20, 2025
by
Michael Goin
Committed by
GitHub
May 20, 2025
Browse files
[Minor] Rename quantization nvfp4 to modelopt_fp4 (#18356)
Signed-off-by:
mgoin
<
mgoin64@gmail.com
>
parent
8f55962a
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
7 additions
and
7 deletions
+7
-7
tests/models/quantization/test_nvfp4.py
tests/models/quantization/test_nvfp4.py
+3
-3
vllm/config.py
vllm/config.py
+1
-1
vllm/model_executor/layers/quantization/__init__.py
vllm/model_executor/layers/quantization/__init__.py
+2
-2
vllm/model_executor/layers/quantization/modelopt.py
vllm/model_executor/layers/quantization/modelopt.py
+1
-1
No files found.
tests/models/quantization/test_nvfp4.py
View file @
f4a8a374
...
...
@@ -41,8 +41,8 @@ EXPECTED_STRS_MAP = {
reason
=
"Prevent unstable test based on golden strings from breaking the build "
" and test input model being too large and hanging the system."
)
@
pytest
.
mark
.
skipif
(
not
is_quant_method_supported
(
"
nv
fp4"
),
reason
=
"
nv
fp4 is not supported on this GPU type."
)
@
pytest
.
mark
.
skipif
(
not
is_quant_method_supported
(
"
modelopt_
fp4"
),
reason
=
"
modelopt_
fp4 is not supported on this GPU type."
)
@
pytest
.
mark
.
parametrize
(
"model_name"
,
MODELS
)
def
test_models
(
example_prompts
,
model_name
)
->
None
:
model
=
LLM
(
...
...
@@ -50,7 +50,7 @@ def test_models(example_prompts, model_name) -> None:
max_model_len
=
MAX_MODEL_LEN
,
trust_remote_code
=
True
,
enforce_eager
=
True
,
quantization
=
"
nv
fp4"
,
quantization
=
"
modelopt_
fp4"
,
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
)
...
...
vllm/config.py
View file @
f4a8a374
...
...
@@ -824,7 +824,7 @@ class ModelConfig:
optimized_quantization_methods
=
[
"fp8"
,
"marlin"
,
"modelopt"
,
"gptq_marlin_24"
,
"gptq_marlin"
,
"awq_marlin"
,
"fbgemm_fp8"
,
"compressed-tensors"
,
"experts_int8"
,
"quark"
,
"
nv
fp4"
,
"bitblas"
,
"gptq_bitblas"
"quark"
,
"
modelopt_
fp4"
,
"bitblas"
,
"gptq_bitblas"
]
if
self
.
quantization
is
not
None
:
self
.
quantization
=
cast
(
QuantizationMethods
,
...
...
vllm/model_executor/layers/quantization/__init__.py
View file @
f4a8a374
...
...
@@ -14,7 +14,7 @@ QuantizationMethods = Literal[
"ptpc_fp8"
,
"fbgemm_fp8"
,
"modelopt"
,
"
nv
fp4"
,
"
modelopt_
fp4"
,
"marlin"
,
"bitblas"
,
"gguf"
,
...
...
@@ -120,7 +120,7 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
"fp8"
:
Fp8Config
,
"fbgemm_fp8"
:
FBGEMMFp8Config
,
"modelopt"
:
ModelOptFp8Config
,
"
nv
fp4"
:
ModelOptNvFp4Config
,
"
modelopt_
fp4"
:
ModelOptNvFp4Config
,
"marlin"
:
MarlinConfig
,
"bitblas"
:
BitBLASConfig
,
"gguf"
:
GGUFConfig
,
...
...
vllm/model_executor/layers/quantization/modelopt.py
View file @
f4a8a374
...
...
@@ -192,7 +192,7 @@ class ModelOptNvFp4Config(QuantizationConfig):
@
classmethod
def
get_name
(
cls
)
->
QuantizationMethods
:
return
"
nv
fp4"
return
"
modelopt_
fp4"
@
classmethod
def
get_supported_act_dtypes
(
cls
)
->
list
[
torch
.
dtype
]:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment