[Bugfix] Fix boolean conversion for OpenVINO env variable (#13615)

382f66fb · Helena Kloosterman · GitHub · 8354f664 · 382f66fb · 382f66fb
Unverified Commit 382f66fb authored Feb 22, 2025 by Helena Kloosterman Committed by GitHub Feb 22, 2025
Show whitespace changes
Inline Side-by-side

Showing with 5 additions and 3 deletions

vllm/envs.py vllm/envs.py +3 -2

vllm/model_executor/model_loader/openvino.py vllm/model_executor/model_loader/openvino.py +2 -1

No files found.
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -360,8 +360,9 @@ environment_variables: Dict[str, Callable[[], Any]] = {
    # Enables weights compression during model export via HF Optimum
    # default is False
    "VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS":
-    lambda: bool(os.getenv("VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS", False)),
-
+    lambda:
+    (os.environ.get("VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS", "0").lower() in
+     ("on", "true", "1")),
    # If the env var is set, then all workers will execute as separate
    # processes from the engine, and we use the same mechanism to trigger
    # execution on all workers.

--- a/vllm/model_executor/model_loader/openvino.py
+++ b/vllm/model_executor/model_loader/openvino.py
@@ -125,7 +125,8 @@ class OpenVINOCausalLM(nn.Module):
                "as-is, all possible options that may affect model conversion "
                "are ignored.")

-        load_in_8bit = envs.VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS
+        load_in_8bit = (envs.VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS
+                        if export else False)
        pt_model = OVModelForCausalLM.from_pretrained(
            model_config.model,
            export=export,