[Bugfix] [CI] Fix Tensorizer LoRA test (#20760)

Signed-off-by: Sanger Steel <sangersteel@gmail.com>

[Bugfix] [CI] Fix Tensorizer LoRA test (#20760)
Signed-off-by: Sanger Steel <sangersteel@gmail.com>
5e53c89a · Sanger Steel · GitHub · c66e38ea · 5e53c89a · 5e53c89a
Unverified Commit 5e53c89a authored Jul 10, 2025 by Sanger Steel Committed by GitHub Jul 10, 2025
3 changed files
--- a/tests/lora/test_llama_tp.py
+++ b/tests/lora/test_llama_tp.py
@@ -4,8 +4,6 @@ import subprocess
 import sys
 from typing import Union

-import pytest
-
 import vllm
 from vllm import LLM
 from vllm.lora.request import LoRARequest
@@ -151,8 +149,6 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
    generate_and_test(llm, sql_lora_files)


-@pytest.mark.skip(reason=("Skipping this test as tensorizer is not "
-                          "working with LoRA as of #19619"))
 @multi_gpu_test(num_gpus=2)
 @create_new_process_for_each_test()
 def test_tp2_serialize_and_deserialize_lora(tmp_path, sql_lora_files,
@@ -189,7 +185,6 @@ def test_tp2_serialize_and_deserialize_lora(tmp_path, sql_lora_files,

    model_uri = tmp_path / "vllm" / model_ref / suffix / model_name
    tensorizer_config = TensorizerConfig(tensorizer_uri=str(model_uri))
-    tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir

    loaded_vllm_model = LLM(model=model_ref,
                            load_format="tensorizer",
@@ -200,16 +195,16 @@ def test_tp2_serialize_and_deserialize_lora(tmp_path, sql_lora_files,
                            tensor_parallel_size=2,
                            max_loras=2)

-    tensorizer_config_dict = tensorizer_config.to_serializable()
+    tc_as_dict = tensorizer_config.to_serializable()

    print("lora adapter created")
    assert do_sample(loaded_vllm_model,
                     sql_lora_files,
-                     tensorizer_config_dict=tensorizer_config_dict,
+                     tensorizer_config_dict=tc_as_dict,
                     lora_id=0) == EXPECTED_NO_LORA_OUTPUT

    print("lora 1")
    assert do_sample(loaded_vllm_model,
                     sql_lora_files,
-                     tensorizer_config_dict=tensorizer_config_dict,
+                     tensorizer_config_dict=tc_as_dict,
                     lora_id=1) == EXPECTED_LORA_OUTPUT
--- a/vllm/lora/peft_helper.py
+++ b/vllm/lora/peft_helper.py
@@ -102,7 +102,7 @@ class PEFTHelper:
            tensorizer_config = TensorizerConfig(**tensorizer_config_dict)
            tensorizer_args = tensorizer_config._construct_tensorizer_args()
            from tensorizer.stream_io import open_stream
-            lora_config_path = os.path.join(tensorizer_config.lora_dir,
+            lora_config_path = os.path.join(tensorizer_config.tensorizer_dir,
                                            "adapter_config.json")
            with open_stream(lora_config_path,
                             mode="rb",
@@ -110,7 +110,7 @@ class PEFTHelper:
                config = json.load(f)

            logger.info("Successfully deserialized LoRA config from %s",
-                        tensorizer_config.lora_dir)
+                        tensorizer_config.tensorizer_dir)

        else:
            with open(lora_config_path) as f:

--- a/vllm/model_executor/model_loader/tensorizer.py
+++ b/vllm/model_executor/model_loader/tensorizer.py
@@ -222,17 +222,17 @@ class TensorizerConfig(MutableMapping):
        self._is_sharded = isinstance(self.tensorizer_uri, str) \
            and re.search(r'%0\dd', self.tensorizer_uri) is not None

+        if self.tensorizer_dir and self.lora_dir:
+            raise ValueError(
+                "Only one of tensorizer_dir or lora_dir may be specified. "
+                "Use lora_dir exclusively when serializing LoRA adapters, "
+                "and tensorizer_dir or tensorizer_uri otherwise.")
        if self.tensorizer_dir and self.tensorizer_uri:
            logger.warning_once(
                "Provided both tensorizer_dir and tensorizer_uri. "
                "Inferring tensorizer_dir from tensorizer_uri as the "
                "latter takes precedence.")
            self.tensorizer_dir = os.path.dirname(self.tensorizer_uri)
-        if self.tensorizer_dir and self.lora_dir:
-            raise ValueError(
-                "Only one of tensorizer_dir or lora_dir may be specified. "
-                "Use lora_dir exclusively when serializing LoRA adapters, "
-                "and tensorizer_dir or tensorizer_uri otherwise.")
        if not self.tensorizer_uri:
            if self.lora_dir:
                self.tensorizer_uri = f"{self.lora_dir}/adapter_model.tensors"
@@ -695,7 +695,7 @@ def tensorize_lora_adapter(lora_path: str,
    needed to load a LoRA adapter are a safetensors-format file called
    adapter_model.safetensors and a json config file called adapter_config.json.

-    Serializes the files in the tensorizer_config.lora_dir
+    Serializes the files in the tensorizer_config.tensorizer_dir
    """
    import safetensors

@@ -725,13 +725,13 @@ def tensorize_lora_adapter(lora_path: str,

    tensorizer_args = tensorizer_config._construct_tensorizer_args()

-    with open_stream(f"{tensorizer_config.lora_dir}/adapter_config.json",
+    with open_stream(f"{tensorizer_config.tensorizer_dir}/adapter_config.json",
                     mode="wb+",
                     **tensorizer_args.stream_kwargs) as f:

        f.write(json.dumps(config).encode("utf-8"))

-    lora_uri = (f"{tensorizer_config.lora_dir}"
+    lora_uri = (f"{tensorizer_config.tensorizer_dir}"
                f"/adapter_model.tensors")
    with open_stream(lora_uri, mode="wb+",
                     **tensorizer_args.stream_kwargs) as f:
@@ -740,4 +740,4 @@ def tensorize_lora_adapter(lora_path: str,
        serializer.close()

    logger.info("Successfully serialized LoRA files to %s",
-                str(tensorizer_config.lora_dir))
+                str(tensorizer_config.tensorizer_dir))