Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4e68ae5e
Unverified
Commit
4e68ae5e
authored
Jun 03, 2025
by
Jee Jee Li
Committed by
GitHub
Jun 03, 2025
Browse files
[CI/Build] Remove V0 LoRA test (#19066)
Signed-off-by:
Jee Jee Li
<
pandaleefree@gmail.com
>
parent
4e88723f
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
10 additions
and
97 deletions
+10
-97
tests/lora/test_add_lora.py
tests/lora/test_add_lora.py
+2
-19
tests/lora/test_chatglm3_tp.py
tests/lora/test_chatglm3_tp.py
+0
-10
tests/lora/test_llama_tp.py
tests/lora/test_llama_tp.py
+0
-8
tests/lora/test_lora_functions.py
tests/lora/test_lora_functions.py
+8
-26
tests/lora/test_mixtral.py
tests/lora/test_mixtral.py
+0
-8
tests/lora/test_quant_model.py
tests/lora/test_quant_model.py
+0
-8
tests/lora/test_qwen2vl.py
tests/lora/test_qwen2vl.py
+0
-8
tests/lora/test_worker.py
tests/lora/test_worker.py
+0
-10
No files found.
tests/lora/test_add_lora.py
View file @
4e68ae5e
...
...
@@ -6,6 +6,8 @@ import pytest
import
vllm.envs
as
env
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.entrypoints.openai.api_server
import
(
build_async_engine_client_from_engine_args
)
from
vllm.inputs
import
TextPrompt
from
vllm.lora.request
import
LoRARequest
from
vllm.sampling_params
import
SamplingParams
...
...
@@ -16,14 +18,6 @@ LORA_RANK = 64
DEFAULT_MAX_LORAS
=
4
*
3
@
pytest
.
fixture
(
autouse
=
True
)
def
v1
(
run_with_both_engines_lora
):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def
get_lora_requests
(
lora_path
)
->
list
[
LoRARequest
]:
lora_requests
:
list
[
LoRARequest
]
=
[
LoRARequest
(
lora_name
=
f
"
{
i
}
"
,
lora_int_id
=
i
,
lora_path
=
lora_path
)
...
...
@@ -88,17 +82,6 @@ async def test_add_lora(chatglm3_lora_files):
trust_remote_code
=
True
,
enforce_eager
=
True
)
# The run_with_both_engines_lora fixture sets up the `VLLM_USE_V1`
# environment variable. reload vllm.enging.async_llm_engine as
# vllm.engine.async_llm_engine.AsyncLLMEgnine changes depending on the
# env var.
import
importlib
import
vllm.engine.async_llm_engine
importlib
.
reload
(
vllm
.
engine
.
async_llm_engine
)
from
vllm.entrypoints.openai.api_server
import
(
build_async_engine_client_from_engine_args
)
# split lora_requests into 3 parts
part_size
=
len
(
lora_requests
)
//
3
dummy_run_requests
=
lora_requests
[:
part_size
]
...
...
tests/lora/test_chatglm3_tp.py
View file @
4e68ae5e
# SPDX-License-Identifier: Apache-2.0
import
pytest
import
vllm
from
vllm.lora.request
import
LoRARequest
...
...
@@ -18,14 +16,6 @@ EXPECTED_LORA_OUTPUT = [
]
@
pytest
.
fixture
(
autouse
=
True
)
def
v1
(
run_with_both_engines_lora
):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def
do_sample
(
llm
:
vllm
.
LLM
,
lora_path
:
str
,
lora_id
:
int
)
->
list
[
str
]:
prompts
=
[
PROMPT_TEMPLATE
.
format
(
query
=
"How many singers do we have?"
),
...
...
tests/lora/test_llama_tp.py
View file @
4e68ae5e
...
...
@@ -33,14 +33,6 @@ EXPECTED_LORA_OUTPUT = [
]
@
pytest
.
fixture
(
autouse
=
True
)
def
v1
(
run_with_both_engines_lora
):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def
do_sample
(
llm
:
vllm
.
LLM
,
lora_path
:
str
,
lora_id
:
int
,
...
...
tests/lora/test_lora_functions.py
View file @
4e68ae5e
...
...
@@ -2,26 +2,24 @@
"""
Script to test add_lora, remove_lora, pin_lora, list_loras functions.
"""
import
os
import
pytest
from
vllm.engine.arg_utils
import
AsyncEngineArgs
,
EngineArgs
from
vllm.engine.llm_engine
import
LLMEngine
from
vllm.entrypoints.openai.api_server
import
(
build_async_engine_client_from_engine_args
)
from
vllm.lora.request
import
LoRARequest
MODEL_PATH
=
"meta-llama/Llama-2-7b-hf"
LORA_MODULE_PATH
=
"yard1/llama-2-7b-sql-lora-test"
LORA_RANK
=
8
@
pytest
.
fixture
(
autouse
=
True
)
def
v1
(
run_with_both_engines_lora
):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
# @pytest.fixture(autouse=True)
# def v1(run_with_both_engines_lora):
# # Simple autouse wrapper to run both engines for each test
# # This can be promoted up to conftest.py to run for every
# # test in a package
# pass
def
make_lora_request
(
lora_id
:
int
):
...
...
@@ -79,22 +77,6 @@ def test_lora_functions_sync():
@
pytest
.
mark
.
asyncio
async
def
test_lora_functions_async
():
if
os
.
getenv
(
"VLLM_USE_V1"
)
==
"0"
:
pytest
.
skip
(
reason
=
"V0 AsyncLLMEngine does not expose remove/list/pin LoRA functions"
)
# The run_with_both_engines_lora fixture sets up the `VLLM_USE_V1`
# environment variable. reload vllm.enging.async_llm_engine as
# vllm.engine.async_llm_engine.AsyncLLMEgnine changes depending on the
# env var.
import
importlib
import
vllm.engine.async_llm_engine
importlib
.
reload
(
vllm
.
engine
.
async_llm_engine
)
from
vllm.entrypoints.openai.api_server
import
(
build_async_engine_client_from_engine_args
)
max_loras
=
4
engine_args
=
AsyncEngineArgs
(
model
=
MODEL_PATH
,
enable_lora
=
True
,
...
...
tests/lora/test_mixtral.py
View file @
4e68ae5e
...
...
@@ -10,14 +10,6 @@ from vllm.platforms import current_platform
MODEL_PATH
=
"mistralai/Mixtral-8x7B-Instruct-v0.1"
@
pytest
.
fixture
(
autouse
=
True
)
def
v1
(
run_with_both_engines_lora
):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def
do_sample
(
llm
:
vllm
.
LLM
,
lora_path
:
str
,
lora_id
:
int
,
prompts
:
list
[
str
])
->
list
[
str
]:
...
...
tests/lora/test_quant_model.py
View file @
4e68ae5e
...
...
@@ -37,14 +37,6 @@ else:
]
@
pytest
.
fixture
(
autouse
=
True
)
def
v1
(
run_with_both_engines_lora
):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def
do_sample
(
llm
:
vllm
.
LLM
,
lora_path
:
str
,
lora_id
:
int
,
...
...
tests/lora/test_qwen2vl.py
View file @
4e68ae5e
...
...
@@ -13,14 +13,6 @@ from vllm.platforms import current_platform
from
vllm.sampling_params
import
BeamSearchParams
@
pytest
.
fixture
(
autouse
=
not
current_platform
.
is_cpu
())
def
v1
(
run_with_both_engines_lora
):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
@
dataclass
class
TestConfig
:
model_path
:
str
...
...
tests/lora/test_worker.py
View file @
4e68ae5e
...
...
@@ -6,8 +6,6 @@ import tempfile
from
typing
import
Union
from
unittest.mock
import
patch
import
pytest
import
vllm.envs
as
envs
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
LoadConfig
,
LoRAConfig
,
ModelConfig
,
ParallelConfig
,
SchedulerConfig
,
...
...
@@ -18,14 +16,6 @@ from vllm.v1.worker.gpu_worker import Worker as V1Worker
from
vllm.worker.worker
import
Worker
@
pytest
.
fixture
(
autouse
=
True
)
def
v1
(
run_with_both_engines_lora
):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
@
patch
.
dict
(
os
.
environ
,
{
"RANK"
:
"0"
})
def
test_worker_apply_lora
(
sql_lora_files
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment