Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d84b97a3
Unverified
Commit
d84b97a3
authored
Aug 01, 2025
by
XiongfeiWei
Committed by
GitHub
Aug 01, 2025
Browse files
Add lora test for tp>1 case for TPU. (#21970)
Signed-off-by:
Xiongfei Wei
<
isaacwxf23@gmail.com
>
parent
d3317594
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
16 additions
and
7 deletions
+16
-7
tests/tpu/lora/test_lora.py
tests/tpu/lora/test_lora.py
+16
-7
No files found.
tests/tpu/lora/test_lora.py
View file @
d84b97a3
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
import
pytest
from
torch_xla._internal
import
tpu
import
vllm
import
vllm
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
...
@@ -27,25 +28,31 @@ def use_v1_only(monkeypatch: pytest.MonkeyPatch):
...
@@ -27,25 +28,31 @@ def use_v1_only(monkeypatch: pytest.MonkeyPatch):
yield
yield
def
setup_vllm
(
num_loras
:
int
)
->
vllm
.
LLM
:
def
setup_vllm
(
num_loras
:
int
,
tp
:
int
)
->
vllm
.
LLM
:
return
vllm
.
LLM
(
model
=
"Qwen/Qwen2.5-3B-Instruct"
,
return
vllm
.
LLM
(
model
=
"Qwen/Qwen2.5-3B-Instruct"
,
num_scheduler_steps
=
1
,
num_scheduler_steps
=
1
,
max_model_len
=
256
,
max_model_len
=
256
,
max_seq_len_to_capture
=
256
,
max_seq_len_to_capture
=
256
,
max_num_seqs
=
8
,
max_num_seqs
=
8
,
tensor_parallel_size
=
tp
,
enable_lora
=
True
,
enable_lora
=
True
,
max_loras
=
num_loras
,
max_loras
=
num_loras
,
max_lora_rank
=
8
)
max_lora_rank
=
8
)
def
test_single_lora
():
TPU_TENSOR_PARALLEL_SIZES
=
[
1
,
tpu
.
num_available_chips
()
]
if
tpu
.
num_available_chips
()
>
1
else
[
1
]
@
pytest
.
mark
.
parametrize
(
"tp"
,
TPU_TENSOR_PARALLEL_SIZES
)
def
test_single_lora
(
tp
:
int
):
"""
"""
This test ensures we can run a single LoRA adapter on the TPU backend.
This test ensures we can run a single LoRA adapter on the TPU backend.
We run "Username6568/Qwen2.5-3B-Instruct-1_plus_1_equals_1_adapter" which
We run "Username6568/Qwen2.5-3B-Instruct-1_plus_1_equals_1_adapter" which
will force Qwen2.5-3B-Instruct to claim 1+1=1.
will force Qwen2.5-3B-Instruct to claim 1+1=1.
"""
"""
llm
=
setup_vllm
(
1
)
llm
=
setup_vllm
(
1
,
tp
)
prompt
=
"What is 1+1?
\n
"
prompt
=
"What is 1+1?
\n
"
...
@@ -63,7 +70,8 @@ def test_single_lora():
...
@@ -63,7 +70,8 @@ def test_single_lora():
assert
int
(
answer
)
==
1
assert
int
(
answer
)
==
1
def
test_lora_hotswapping
():
@
pytest
.
mark
.
parametrize
(
"tp"
,
TPU_TENSOR_PARALLEL_SIZES
)
def
test_lora_hotswapping
(
tp
:
int
):
"""
"""
This test ensures we can run multiple LoRA adapters on the TPU backend, even
This test ensures we can run multiple LoRA adapters on the TPU backend, even
if we only have space to store 1.
if we only have space to store 1.
...
@@ -79,7 +87,7 @@ def test_lora_hotswapping():
...
@@ -79,7 +87,7 @@ def test_lora_hotswapping():
for
i
in
range
(
1
,
5
)
for
i
in
range
(
1
,
5
)
]
]
llm
=
setup_vllm
(
1
)
llm
=
setup_vllm
(
1
,
tp
)
prompt
=
"What is 1+1?
\n
"
prompt
=
"What is 1+1?
\n
"
...
@@ -94,7 +102,8 @@ def test_lora_hotswapping():
...
@@ -94,7 +102,8 @@ def test_lora_hotswapping():
assert
int
(
answer
)
==
i
+
1
assert
int
(
answer
)
==
i
+
1
def
test_multi_lora
():
@
pytest
.
mark
.
parametrize
(
"tp"
,
TPU_TENSOR_PARALLEL_SIZES
)
def
test_multi_lora
(
tp
:
int
):
"""
"""
This test ensures we can run multiple LoRA adapters on the TPU backend, when
This test ensures we can run multiple LoRA adapters on the TPU backend, when
we have enough space to store all of them.
we have enough space to store all of them.
...
@@ -109,7 +118,7 @@ def test_multi_lora():
...
@@ -109,7 +118,7 @@ def test_multi_lora():
for
i
in
range
(
1
,
5
)
for
i
in
range
(
1
,
5
)
]
]
llm
=
setup_vllm
(
4
)
llm
=
setup_vllm
(
4
,
tp
)
prompt
=
"What is 1+1?
\n
"
prompt
=
"What is 1+1?
\n
"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment