Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
5e53c89a
Unverified
Commit
5e53c89a
authored
Jul 10, 2025
by
Sanger Steel
Committed by
GitHub
Jul 10, 2025
Browse files
[Bugfix] [CI] Fix Tensorizer LoRA test (#20760)
Signed-off-by:
Sanger Steel
<
sangersteel@gmail.com
>
parent
c66e38ea
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
14 additions
and
19 deletions
+14
-19
tests/lora/test_llama_tp.py
tests/lora/test_llama_tp.py
+3
-8
vllm/lora/peft_helper.py
vllm/lora/peft_helper.py
+2
-2
vllm/model_executor/model_loader/tensorizer.py
vllm/model_executor/model_loader/tensorizer.py
+9
-9
No files found.
tests/lora/test_llama_tp.py
View file @
5e53c89a
...
@@ -4,8 +4,6 @@ import subprocess
...
@@ -4,8 +4,6 @@ import subprocess
import
sys
import
sys
from
typing
import
Union
from
typing
import
Union
import
pytest
import
vllm
import
vllm
from
vllm
import
LLM
from
vllm
import
LLM
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
...
@@ -151,8 +149,6 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
...
@@ -151,8 +149,6 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
generate_and_test
(
llm
,
sql_lora_files
)
generate_and_test
(
llm
,
sql_lora_files
)
@
pytest
.
mark
.
skip
(
reason
=
(
"Skipping this test as tensorizer is not "
"working with LoRA as of #19619"
))
@
multi_gpu_test
(
num_gpus
=
2
)
@
multi_gpu_test
(
num_gpus
=
2
)
@
create_new_process_for_each_test
()
@
create_new_process_for_each_test
()
def
test_tp2_serialize_and_deserialize_lora
(
tmp_path
,
sql_lora_files
,
def
test_tp2_serialize_and_deserialize_lora
(
tmp_path
,
sql_lora_files
,
...
@@ -189,7 +185,6 @@ def test_tp2_serialize_and_deserialize_lora(tmp_path, sql_lora_files,
...
@@ -189,7 +185,6 @@ def test_tp2_serialize_and_deserialize_lora(tmp_path, sql_lora_files,
model_uri
=
tmp_path
/
"vllm"
/
model_ref
/
suffix
/
model_name
model_uri
=
tmp_path
/
"vllm"
/
model_ref
/
suffix
/
model_name
tensorizer_config
=
TensorizerConfig
(
tensorizer_uri
=
str
(
model_uri
))
tensorizer_config
=
TensorizerConfig
(
tensorizer_uri
=
str
(
model_uri
))
tensorizer_config
.
lora_dir
=
tensorizer_config
.
tensorizer_dir
loaded_vllm_model
=
LLM
(
model
=
model_ref
,
loaded_vllm_model
=
LLM
(
model
=
model_ref
,
load_format
=
"tensorizer"
,
load_format
=
"tensorizer"
,
...
@@ -200,16 +195,16 @@ def test_tp2_serialize_and_deserialize_lora(tmp_path, sql_lora_files,
...
@@ -200,16 +195,16 @@ def test_tp2_serialize_and_deserialize_lora(tmp_path, sql_lora_files,
tensor_parallel_size
=
2
,
tensor_parallel_size
=
2
,
max_loras
=
2
)
max_loras
=
2
)
t
ensorizer_config
_dict
=
tensorizer_config
.
to_serializable
()
t
c_as
_dict
=
tensorizer_config
.
to_serializable
()
print
(
"lora adapter created"
)
print
(
"lora adapter created"
)
assert
do_sample
(
loaded_vllm_model
,
assert
do_sample
(
loaded_vllm_model
,
sql_lora_files
,
sql_lora_files
,
tensorizer_config_dict
=
t
ensorizer_config
_dict
,
tensorizer_config_dict
=
t
c_as
_dict
,
lora_id
=
0
)
==
EXPECTED_NO_LORA_OUTPUT
lora_id
=
0
)
==
EXPECTED_NO_LORA_OUTPUT
print
(
"lora 1"
)
print
(
"lora 1"
)
assert
do_sample
(
loaded_vllm_model
,
assert
do_sample
(
loaded_vllm_model
,
sql_lora_files
,
sql_lora_files
,
tensorizer_config_dict
=
t
ensorizer_config
_dict
,
tensorizer_config_dict
=
t
c_as
_dict
,
lora_id
=
1
)
==
EXPECTED_LORA_OUTPUT
lora_id
=
1
)
==
EXPECTED_LORA_OUTPUT
vllm/lora/peft_helper.py
View file @
5e53c89a
...
@@ -102,7 +102,7 @@ class PEFTHelper:
...
@@ -102,7 +102,7 @@ class PEFTHelper:
tensorizer_config
=
TensorizerConfig
(
**
tensorizer_config_dict
)
tensorizer_config
=
TensorizerConfig
(
**
tensorizer_config_dict
)
tensorizer_args
=
tensorizer_config
.
_construct_tensorizer_args
()
tensorizer_args
=
tensorizer_config
.
_construct_tensorizer_args
()
from
tensorizer.stream_io
import
open_stream
from
tensorizer.stream_io
import
open_stream
lora_config_path
=
os
.
path
.
join
(
tensorizer_config
.
lora
_dir
,
lora_config_path
=
os
.
path
.
join
(
tensorizer_config
.
tensorizer
_dir
,
"adapter_config.json"
)
"adapter_config.json"
)
with
open_stream
(
lora_config_path
,
with
open_stream
(
lora_config_path
,
mode
=
"rb"
,
mode
=
"rb"
,
...
@@ -110,7 +110,7 @@ class PEFTHelper:
...
@@ -110,7 +110,7 @@ class PEFTHelper:
config
=
json
.
load
(
f
)
config
=
json
.
load
(
f
)
logger
.
info
(
"Successfully deserialized LoRA config from %s"
,
logger
.
info
(
"Successfully deserialized LoRA config from %s"
,
tensorizer_config
.
lora
_dir
)
tensorizer_config
.
tensorizer
_dir
)
else
:
else
:
with
open
(
lora_config_path
)
as
f
:
with
open
(
lora_config_path
)
as
f
:
...
...
vllm/model_executor/model_loader/tensorizer.py
View file @
5e53c89a
...
@@ -222,17 +222,17 @@ class TensorizerConfig(MutableMapping):
...
@@ -222,17 +222,17 @@ class TensorizerConfig(MutableMapping):
self
.
_is_sharded
=
isinstance
(
self
.
tensorizer_uri
,
str
)
\
self
.
_is_sharded
=
isinstance
(
self
.
tensorizer_uri
,
str
)
\
and
re
.
search
(
r
'%0\dd'
,
self
.
tensorizer_uri
)
is
not
None
and
re
.
search
(
r
'%0\dd'
,
self
.
tensorizer_uri
)
is
not
None
if
self
.
tensorizer_dir
and
self
.
lora_dir
:
raise
ValueError
(
"Only one of tensorizer_dir or lora_dir may be specified. "
"Use lora_dir exclusively when serializing LoRA adapters, "
"and tensorizer_dir or tensorizer_uri otherwise."
)
if
self
.
tensorizer_dir
and
self
.
tensorizer_uri
:
if
self
.
tensorizer_dir
and
self
.
tensorizer_uri
:
logger
.
warning_once
(
logger
.
warning_once
(
"Provided both tensorizer_dir and tensorizer_uri. "
"Provided both tensorizer_dir and tensorizer_uri. "
"Inferring tensorizer_dir from tensorizer_uri as the "
"Inferring tensorizer_dir from tensorizer_uri as the "
"latter takes precedence."
)
"latter takes precedence."
)
self
.
tensorizer_dir
=
os
.
path
.
dirname
(
self
.
tensorizer_uri
)
self
.
tensorizer_dir
=
os
.
path
.
dirname
(
self
.
tensorizer_uri
)
if
self
.
tensorizer_dir
and
self
.
lora_dir
:
raise
ValueError
(
"Only one of tensorizer_dir or lora_dir may be specified. "
"Use lora_dir exclusively when serializing LoRA adapters, "
"and tensorizer_dir or tensorizer_uri otherwise."
)
if
not
self
.
tensorizer_uri
:
if
not
self
.
tensorizer_uri
:
if
self
.
lora_dir
:
if
self
.
lora_dir
:
self
.
tensorizer_uri
=
f
"
{
self
.
lora_dir
}
/adapter_model.tensors"
self
.
tensorizer_uri
=
f
"
{
self
.
lora_dir
}
/adapter_model.tensors"
...
@@ -695,7 +695,7 @@ def tensorize_lora_adapter(lora_path: str,
...
@@ -695,7 +695,7 @@ def tensorize_lora_adapter(lora_path: str,
needed to load a LoRA adapter are a safetensors-format file called
needed to load a LoRA adapter are a safetensors-format file called
adapter_model.safetensors and a json config file called adapter_config.json.
adapter_model.safetensors and a json config file called adapter_config.json.
Serializes the files in the tensorizer_config.
lora
_dir
Serializes the files in the tensorizer_config.
tensorizer
_dir
"""
"""
import
safetensors
import
safetensors
...
@@ -725,13 +725,13 @@ def tensorize_lora_adapter(lora_path: str,
...
@@ -725,13 +725,13 @@ def tensorize_lora_adapter(lora_path: str,
tensorizer_args
=
tensorizer_config
.
_construct_tensorizer_args
()
tensorizer_args
=
tensorizer_config
.
_construct_tensorizer_args
()
with
open_stream
(
f
"
{
tensorizer_config
.
lora
_dir
}
/adapter_config.json"
,
with
open_stream
(
f
"
{
tensorizer_config
.
tensorizer
_dir
}
/adapter_config.json"
,
mode
=
"wb+"
,
mode
=
"wb+"
,
**
tensorizer_args
.
stream_kwargs
)
as
f
:
**
tensorizer_args
.
stream_kwargs
)
as
f
:
f
.
write
(
json
.
dumps
(
config
).
encode
(
"utf-8"
))
f
.
write
(
json
.
dumps
(
config
).
encode
(
"utf-8"
))
lora_uri
=
(
f
"
{
tensorizer_config
.
lora
_dir
}
"
lora_uri
=
(
f
"
{
tensorizer_config
.
tensorizer
_dir
}
"
f
"/adapter_model.tensors"
)
f
"/adapter_model.tensors"
)
with
open_stream
(
lora_uri
,
mode
=
"wb+"
,
with
open_stream
(
lora_uri
,
mode
=
"wb+"
,
**
tensorizer_args
.
stream_kwargs
)
as
f
:
**
tensorizer_args
.
stream_kwargs
)
as
f
:
...
@@ -740,4 +740,4 @@ def tensorize_lora_adapter(lora_path: str,
...
@@ -740,4 +740,4 @@ def tensorize_lora_adapter(lora_path: str,
serializer
.
close
()
serializer
.
close
()
logger
.
info
(
"Successfully serialized LoRA files to %s"
,
logger
.
info
(
"Successfully serialized LoRA files to %s"
,
str
(
tensorizer_config
.
lora
_dir
))
str
(
tensorizer_config
.
tensorizer
_dir
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment