Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b4f9e963
Unverified
Commit
b4f9e963
authored
Aug 29, 2025
by
Jee Jee Li
Committed by
GitHub
Aug 28, 2025
Browse files
[CI/Build] Clean up LoRA test (#23890)
Signed-off-by:
Jee Jee Li
<
pandaleefree@gmail.com
>
parent
05d839c1
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
40 additions
and
87 deletions
+40
-87
.buildkite/scripts/hardware_ci/run-amd-test.sh
.buildkite/scripts/hardware_ci/run-amd-test.sh
+0
-1
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+4
-5
tests/entrypoints/llm/test_generate_multiple_loras.py
tests/entrypoints/llm/test_generate_multiple_loras.py
+0
-80
tests/lora/test_llm_with_multi_loras.py
tests/lora/test_llm_with_multi_loras.py
+36
-1
No files found.
.buildkite/scripts/hardware_ci/run-amd-test.sh
View file @
b4f9e963
...
...
@@ -164,7 +164,6 @@ if [[ $commands == *" entrypoints/llm "* ]]; then
--ignore=entrypoints/llm/test_chat.py
\
--ignore=entrypoints/llm/test_accuracy.py
\
--ignore=entrypoints/llm/test_init.py
\
--ignore=entrypoints/llm/test_generate_multiple_loras.py
\
--ignore=entrypoints/llm/test_prompt_validation.py "
}
fi
...
...
.buildkite/test-pipeline.yaml
View file @
b4f9e963
...
...
@@ -109,10 +109,9 @@ steps:
-
tests/entrypoints/offline_mode
commands
:
-
export VLLM_WORKER_MULTIPROC_METHOD=spawn
-
pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_lazy_outlines.py --ignore=entrypoints/llm/test_generate.py
--ignore=entrypoints/llm/test_generate_multiple_loras.py
--ignore=entrypoints/llm/test_collective_rpc.py
-
pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_lazy_outlines.py --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
-
pytest -v -s entrypoints/llm/test_lazy_outlines.py
# it needs a clean process
-
pytest -v -s entrypoints/llm/test_generate.py
# it needs a clean process
-
pytest -v -s entrypoints/llm/test_generate_multiple_loras.py
# it needs a clean process
-
VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode
# Needs to avoid interference with other tests
-
label
:
Entrypoints Test (API Server)
# 40min
...
...
@@ -326,7 +325,7 @@ steps:
source_file_dependencies
:
-
vllm/lora
-
tests/lora
command
:
pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py
command
:
pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py
--ignore=lora/test_llm_with_multi_loras.py
parallelism
:
4
-
label
:
PyTorch Compilation Unit Tests
...
...
@@ -807,13 +806,13 @@ steps:
# requires multi-GPU testing for validation.
-
pytest -v -s -x lora/test_chatglm3_tp.py
-
pytest -v -s -x lora/test_llama_tp.py
-
pytest -v -s -x lora/test_multi_loras
_with_tp
.py
-
pytest -v -s -x lora/test_
llm_with_
multi_loras.py
-
label
:
Weight Loading Multiple GPU Test
# 33min
mirror_hardwares
:
[
amdexperimental
]
working_dir
:
"
/vllm-workspace/tests"
num_gpus
:
2
num_gpus
:
2
optional
:
true
source_file_dependencies
:
-
vllm/
...
...
tests/entrypoints/llm/test_generate_multiple_loras.py
deleted
100644 → 0
View file @
05d839c1
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
weakref
import
pytest
# downloading lora to test lora requests
from
huggingface_hub
import
snapshot_download
from
vllm
import
LLM
from
vllm.distributed
import
cleanup_dist_env_and_memory
from
vllm.lora.request
import
LoRARequest
MODEL_NAME
=
"HuggingFaceH4/zephyr-7b-beta"
PROMPTS
=
[
"Hello, my name is"
,
"The president of the United States is"
,
"The capital of France is"
,
"The future of AI is"
,
]
LORA_NAME
=
"typeof/zephyr-7b-beta-lora"
@
pytest
.
fixture
(
scope
=
"module"
)
def
monkeypatch_module
():
from
_pytest.monkeypatch
import
MonkeyPatch
mpatch
=
MonkeyPatch
()
yield
mpatch
mpatch
.
undo
()
@
pytest
.
fixture
(
scope
=
"module"
,
params
=
[
False
,
True
])
def
llm
(
request
,
monkeypatch_module
):
use_v1
=
request
.
param
monkeypatch_module
.
setenv
(
'VLLM_USE_V1'
,
'1'
if
use_v1
else
'0'
)
# pytest caches the fixture so we use weakref.proxy to
# enable garbage collection
llm
=
LLM
(
model
=
MODEL_NAME
,
tensor_parallel_size
=
1
,
max_model_len
=
8192
,
enable_lora
=
True
,
max_loras
=
4
,
max_lora_rank
=
64
,
max_num_seqs
=
128
,
enforce_eager
=
True
)
yield
weakref
.
proxy
(
llm
)
del
llm
cleanup_dist_env_and_memory
()
@
pytest
.
fixture
(
scope
=
"module"
)
def
zephyr_lora_files
():
return
snapshot_download
(
repo_id
=
LORA_NAME
)
@
pytest
.
mark
.
skip_global_cleanup
def
test_multiple_lora_requests
(
llm
:
LLM
,
zephyr_lora_files
):
lora_request
=
[
LoRARequest
(
LORA_NAME
+
str
(
idx
),
idx
+
1
,
zephyr_lora_files
)
for
idx
in
range
(
len
(
PROMPTS
))
]
# Multiple SamplingParams should be matched with each prompt
outputs
=
llm
.
generate
(
PROMPTS
,
lora_request
=
lora_request
)
assert
len
(
PROMPTS
)
==
len
(
outputs
)
# Exception raised, if the size of params does not match the size of prompts
with
pytest
.
raises
(
ValueError
):
outputs
=
llm
.
generate
(
PROMPTS
,
lora_request
=
lora_request
[:
1
])
# Single LoRARequest should be applied to every prompt
single_lora_request
=
lora_request
[
0
]
outputs
=
llm
.
generate
(
PROMPTS
,
lora_request
=
single_lora_request
)
assert
len
(
PROMPTS
)
==
len
(
outputs
)
tests/lora/test_multi_loras
_with_tp
.py
→
tests/lora/test_
llm_with_
multi_loras.py
View file @
b4f9e963
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Script to test multi loras service with tp >= 2
This script contains:
1. test multi loras service with tp >= 2
2. test multi loras request
"""
import
pytest
from
tests.utils
import
multi_gpu_test
from
vllm
import
LLM
,
SamplingParams
from
vllm.lora.request
import
LoRARequest
...
...
@@ -156,3 +160,34 @@ def test_multi_loras_with_tp_sync():
output_text
=
call_llm_get_outputs
(
prompt
,
"Alice"
)
check_outputs
(
output_text
,
expected_output
)
def
test_multiple_lora_requests
():
llm
=
LLM
(
model
=
MODEL_PATH
,
enable_lora
=
True
,
max_loras
=
4
,
max_lora_rank
=
LORA_RANK
,
max_model_len
=
512
,
gpu_memory_utilization
=
0.5
,
enforce_eager
=
True
,
)
PROMPTS
=
[
"Hello, my name is"
]
*
2
LORA_NAME
=
"Alice"
lora_request
=
[
LoRARequest
(
LORA_NAME
+
str
(
idx
),
idx
+
1
,
LORA_NAME_PATH_MAP
[
LORA_NAME
])
for
idx
in
range
(
len
(
PROMPTS
))
]
# Multiple SamplingParams should be matched with each prompt
outputs
=
llm
.
generate
(
PROMPTS
,
lora_request
=
lora_request
)
assert
len
(
PROMPTS
)
==
len
(
outputs
)
# Exception raised, if the size of params does not match the size of prompts
with
pytest
.
raises
(
ValueError
):
outputs
=
llm
.
generate
(
PROMPTS
,
lora_request
=
lora_request
[:
1
])
# Single LoRARequest should be applied to every prompt
single_lora_request
=
lora_request
[
0
]
outputs
=
llm
.
generate
(
PROMPTS
,
lora_request
=
single_lora_request
)
assert
len
(
PROMPTS
)
==
len
(
outputs
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment