[Minor] Rename quantization nvfp4 to modelopt_fp4 (#18356)

Signed-off-by: mgoin <mgoin64@gmail.com>

[Minor] Rename quantization nvfp4 to modelopt_fp4 (#18356)
Signed-off-by: mgoin <mgoin64@gmail.com>
f4a8a374 · Michael Goin · GitHub · 8f55962a · f4a8a374 · f4a8a374
Unverified Commit f4a8a374 authored May 20, 2025 by Michael Goin Committed by GitHub May 20, 2025
4 changed files
--- a/tests/models/quantization/test_nvfp4.py
+++ b/tests/models/quantization/test_nvfp4.py
@@ -41,8 +41,8 @@ EXPECTED_STRS_MAP = {
    reason=
    "Prevent unstable test based on golden strings from breaking the build "
    " and test input model being too large and hanging the system.")
-@pytest.mark.skipif(not is_quant_method_supported("nvfp4"),
-                    reason="nvfp4 is not supported on this GPU type.")
+@pytest.mark.skipif(not is_quant_method_supported("modelopt_fp4"),
+                    reason="modelopt_fp4 is not supported on this GPU type.")
 @pytest.mark.parametrize("model_name", MODELS)
 def test_models(example_prompts, model_name) -> None:
    model = LLM(
@@ -50,7 +50,7 @@ def test_models(example_prompts, model_name) -> None:
        max_model_len=MAX_MODEL_LEN,
        trust_remote_code=True,
        enforce_eager=True,
-        quantization="nvfp4",
+        quantization="modelopt_fp4",
    )

    tokenizer = AutoTokenizer.from_pretrained(model_name)

--- a/vllm/config.py
+++ b/vllm/config.py
@@ -824,7 +824,7 @@ class ModelConfig:
        optimized_quantization_methods = [
            "fp8", "marlin", "modelopt", "gptq_marlin_24", "gptq_marlin",
            "awq_marlin", "fbgemm_fp8", "compressed-tensors", "experts_int8",
-            "quark", "nvfp4", "bitblas", "gptq_bitblas"
+            "quark", "modelopt_fp4", "bitblas", "gptq_bitblas"
        ]
        if self.quantization is not None:
            self.quantization = cast(QuantizationMethods,

--- a/vllm/model_executor/layers/quantization/__init__.py
+++ b/vllm/model_executor/layers/quantization/__init__.py
@@ -14,7 +14,7 @@ QuantizationMethods = Literal[
    "ptpc_fp8",
    "fbgemm_fp8",
    "modelopt",
-    "nvfp4",
+    "modelopt_fp4",
    "marlin",
    "bitblas",
    "gguf",
@@ -120,7 +120,7 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
        "fp8": Fp8Config,
        "fbgemm_fp8": FBGEMMFp8Config,
        "modelopt": ModelOptFp8Config,
-        "nvfp4": ModelOptNvFp4Config,
+        "modelopt_fp4": ModelOptNvFp4Config,
        "marlin": MarlinConfig,
        "bitblas": BitBLASConfig,
        "gguf": GGUFConfig,

--- a/vllm/model_executor/layers/quantization/modelopt.py
+++ b/vllm/model_executor/layers/quantization/modelopt.py
@@ -192,7 +192,7 @@ class ModelOptNvFp4Config(QuantizationConfig):

    @classmethod
    def get_name(cls) -> QuantizationMethods:
-        return "nvfp4"
+        return "modelopt_fp4"

    @classmethod
    def get_supported_act_dtypes(cls) -> list[torch.dtype]: