[AMD][FP8][BugFix] Remove V1 check in arg_utils.py for FP8 since it is not necessary (#17215)

Signed-off-by: Randall Smith <Randall.Smith@amd.com>

[AMD][FP8][BugFix] Remove V1 check in arg_utils.py for FP8 since it is not necessary (#17215)
Signed-off-by: Randall Smith <Randall.Smith@amd.com>
68af5f6c · rasmith · GitHub · 8de2901f · 68af5f6c · 68af5f6c
Unverified Commit 68af5f6c authored Apr 25, 2025 by rasmith Committed by GitHub Apr 25, 2025
Show whitespace changes
Inline Side-by-side

Showing with 0 additions and 29 deletions

vllm/engine/arg_utils.py vllm/engine/arg_utils.py +0 -17

vllm/model_executor/layers/quantization/quark/quark.py vllm/model_executor/layers/quantization/quark/quark.py +0 -12

No files found.
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1368,23 +1368,6 @@ class EngineArgs:
                               recommend_to_remove=False)
            return False

-        if current_platform.is_rocm():
-            from vllm.model_executor.layers.quantization.fp8 import Fp8Config
-            load_config = self.create_load_config()
-            quantization_config = VllmConfig.get_quantization_config(
-                model_config, load_config)
-            if isinstance(quantization_config, Fp8Config):
-                _raise_or_fallback(feature_name="fp8 for ROCm",
-                                   recommend_to_remove=False)
-                return False
-            from vllm.model_executor.layers.quantization.quark.quark import (
-                QuarkConfig)
-
-            if isinstance(quantization_config, QuarkConfig
-                          ) and quantization_config.has_fp8_layer_weights():
-                _raise_or_fallback(feature_name="Quark fp8 for ROCm",
-                                   recommend_to_remove=False)
-
        # No Fp8 KV cache so far.
        if self.kv_cache_dtype != "auto":
            fp8_attention = self.kv_cache_dtype.startswith("fp8")

--- a/vllm/model_executor/layers/quantization/quark/quark.py
+++ b/vllm/model_executor/layers/quantization/quark/quark.py
@@ -307,18 +307,6 @@ class QuarkConfig(QuantizationConfig):
        # If no matches, return None
        return None

-    def has_fp8_layer_weights(self):
-        layer_quant_config = self.quant_config.get("layer_quant_config")
-        to_dict = lambda obj: cast(Dict[str, Any], obj) or {}
-        return any([
-            'fp8' in cast(
-                str,
-                to_dict(
-                    to_dict(to_dict(layer_quant_config).get(layer_name)).get(
-                        "weight")).get("dtype"))
-            for layer_name in ["*v_proj", "*k_proj", "*q_proj"]
-        ])
-

 class QuarkLinearMethod(LinearMethodBase):