Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4e68ae5e
Unverified
Commit
4e68ae5e
authored
Jun 03, 2025
by
Jee Jee Li
Committed by
GitHub
Jun 03, 2025
Browse files
[CI/Build] Remove V0 LoRA test (#19066)
Signed-off-by:
Jee Jee Li
<
pandaleefree@gmail.com
>
parent
4e88723f
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
10 additions
and
97 deletions
+10
-97
tests/lora/test_add_lora.py
tests/lora/test_add_lora.py
+2
-19
tests/lora/test_chatglm3_tp.py
tests/lora/test_chatglm3_tp.py
+0
-10
tests/lora/test_llama_tp.py
tests/lora/test_llama_tp.py
+0
-8
tests/lora/test_lora_functions.py
tests/lora/test_lora_functions.py
+8
-26
tests/lora/test_mixtral.py
tests/lora/test_mixtral.py
+0
-8
tests/lora/test_quant_model.py
tests/lora/test_quant_model.py
+0
-8
tests/lora/test_qwen2vl.py
tests/lora/test_qwen2vl.py
+0
-8
tests/lora/test_worker.py
tests/lora/test_worker.py
+0
-10
No files found.
tests/lora/test_add_lora.py
View file @
4e68ae5e
...
@@ -6,6 +6,8 @@ import pytest
...
@@ -6,6 +6,8 @@ import pytest
import
vllm.envs
as
env
import
vllm.envs
as
env
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.entrypoints.openai.api_server
import
(
build_async_engine_client_from_engine_args
)
from
vllm.inputs
import
TextPrompt
from
vllm.inputs
import
TextPrompt
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
...
@@ -16,14 +18,6 @@ LORA_RANK = 64
...
@@ -16,14 +18,6 @@ LORA_RANK = 64
DEFAULT_MAX_LORAS
=
4
*
3
DEFAULT_MAX_LORAS
=
4
*
3
@
pytest
.
fixture
(
autouse
=
True
)
def
v1
(
run_with_both_engines_lora
):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def
get_lora_requests
(
lora_path
)
->
list
[
LoRARequest
]:
def
get_lora_requests
(
lora_path
)
->
list
[
LoRARequest
]:
lora_requests
:
list
[
LoRARequest
]
=
[
lora_requests
:
list
[
LoRARequest
]
=
[
LoRARequest
(
lora_name
=
f
"
{
i
}
"
,
lora_int_id
=
i
,
lora_path
=
lora_path
)
LoRARequest
(
lora_name
=
f
"
{
i
}
"
,
lora_int_id
=
i
,
lora_path
=
lora_path
)
...
@@ -88,17 +82,6 @@ async def test_add_lora(chatglm3_lora_files):
...
@@ -88,17 +82,6 @@ async def test_add_lora(chatglm3_lora_files):
trust_remote_code
=
True
,
trust_remote_code
=
True
,
enforce_eager
=
True
)
enforce_eager
=
True
)
# The run_with_both_engines_lora fixture sets up the `VLLM_USE_V1`
# environment variable. reload vllm.enging.async_llm_engine as
# vllm.engine.async_llm_engine.AsyncLLMEgnine changes depending on the
# env var.
import
importlib
import
vllm.engine.async_llm_engine
importlib
.
reload
(
vllm
.
engine
.
async_llm_engine
)
from
vllm.entrypoints.openai.api_server
import
(
build_async_engine_client_from_engine_args
)
# split lora_requests into 3 parts
# split lora_requests into 3 parts
part_size
=
len
(
lora_requests
)
//
3
part_size
=
len
(
lora_requests
)
//
3
dummy_run_requests
=
lora_requests
[:
part_size
]
dummy_run_requests
=
lora_requests
[:
part_size
]
...
...
tests/lora/test_chatglm3_tp.py
View file @
4e68ae5e
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
pytest
import
vllm
import
vllm
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
...
@@ -18,14 +16,6 @@ EXPECTED_LORA_OUTPUT = [
...
@@ -18,14 +16,6 @@ EXPECTED_LORA_OUTPUT = [
]
]
@
pytest
.
fixture
(
autouse
=
True
)
def
v1
(
run_with_both_engines_lora
):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def
do_sample
(
llm
:
vllm
.
LLM
,
lora_path
:
str
,
lora_id
:
int
)
->
list
[
str
]:
def
do_sample
(
llm
:
vllm
.
LLM
,
lora_path
:
str
,
lora_id
:
int
)
->
list
[
str
]:
prompts
=
[
prompts
=
[
PROMPT_TEMPLATE
.
format
(
query
=
"How many singers do we have?"
),
PROMPT_TEMPLATE
.
format
(
query
=
"How many singers do we have?"
),
...
...
tests/lora/test_llama_tp.py
View file @
4e68ae5e
...
@@ -33,14 +33,6 @@ EXPECTED_LORA_OUTPUT = [
...
@@ -33,14 +33,6 @@ EXPECTED_LORA_OUTPUT = [
]
]
@
pytest
.
fixture
(
autouse
=
True
)
def
v1
(
run_with_both_engines_lora
):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def
do_sample
(
llm
:
vllm
.
LLM
,
def
do_sample
(
llm
:
vllm
.
LLM
,
lora_path
:
str
,
lora_path
:
str
,
lora_id
:
int
,
lora_id
:
int
,
...
...
tests/lora/test_lora_functions.py
View file @
4e68ae5e
...
@@ -2,26 +2,24 @@
...
@@ -2,26 +2,24 @@
"""
"""
Script to test add_lora, remove_lora, pin_lora, list_loras functions.
Script to test add_lora, remove_lora, pin_lora, list_loras functions.
"""
"""
import
os
import
pytest
import
pytest
from
vllm.engine.arg_utils
import
AsyncEngineArgs
,
EngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
,
EngineArgs
from
vllm.engine.llm_engine
import
LLMEngine
from
vllm.engine.llm_engine
import
LLMEngine
from
vllm.entrypoints.openai.api_server
import
(
build_async_engine_client_from_engine_args
)
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
MODEL_PATH
=
"meta-llama/Llama-2-7b-hf"
MODEL_PATH
=
"meta-llama/Llama-2-7b-hf"
LORA_MODULE_PATH
=
"yard1/llama-2-7b-sql-lora-test"
LORA_MODULE_PATH
=
"yard1/llama-2-7b-sql-lora-test"
LORA_RANK
=
8
LORA_RANK
=
8
# @pytest.fixture(autouse=True)
@
pytest
.
fixture
(
autouse
=
True
)
# def v1(run_with_both_engines_lora):
def
v1
(
run_with_both_engines_lora
):
# # Simple autouse wrapper to run both engines for each test
# Simple autouse wrapper to run both engines for each test
# # This can be promoted up to conftest.py to run for every
# This can be promoted up to conftest.py to run for every
# # test in a package
# test in a package
# pass
pass
def
make_lora_request
(
lora_id
:
int
):
def
make_lora_request
(
lora_id
:
int
):
...
@@ -79,22 +77,6 @@ def test_lora_functions_sync():
...
@@ -79,22 +77,6 @@ def test_lora_functions_sync():
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
async
def
test_lora_functions_async
():
async
def
test_lora_functions_async
():
if
os
.
getenv
(
"VLLM_USE_V1"
)
==
"0"
:
pytest
.
skip
(
reason
=
"V0 AsyncLLMEngine does not expose remove/list/pin LoRA functions"
)
# The run_with_both_engines_lora fixture sets up the `VLLM_USE_V1`
# environment variable. reload vllm.enging.async_llm_engine as
# vllm.engine.async_llm_engine.AsyncLLMEgnine changes depending on the
# env var.
import
importlib
import
vllm.engine.async_llm_engine
importlib
.
reload
(
vllm
.
engine
.
async_llm_engine
)
from
vllm.entrypoints.openai.api_server
import
(
build_async_engine_client_from_engine_args
)
max_loras
=
4
max_loras
=
4
engine_args
=
AsyncEngineArgs
(
model
=
MODEL_PATH
,
engine_args
=
AsyncEngineArgs
(
model
=
MODEL_PATH
,
enable_lora
=
True
,
enable_lora
=
True
,
...
...
tests/lora/test_mixtral.py
View file @
4e68ae5e
...
@@ -10,14 +10,6 @@ from vllm.platforms import current_platform
...
@@ -10,14 +10,6 @@ from vllm.platforms import current_platform
MODEL_PATH
=
"mistralai/Mixtral-8x7B-Instruct-v0.1"
MODEL_PATH
=
"mistralai/Mixtral-8x7B-Instruct-v0.1"
@
pytest
.
fixture
(
autouse
=
True
)
def
v1
(
run_with_both_engines_lora
):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def
do_sample
(
llm
:
vllm
.
LLM
,
lora_path
:
str
,
lora_id
:
int
,
def
do_sample
(
llm
:
vllm
.
LLM
,
lora_path
:
str
,
lora_id
:
int
,
prompts
:
list
[
str
])
->
list
[
str
]:
prompts
:
list
[
str
])
->
list
[
str
]:
...
...
tests/lora/test_quant_model.py
View file @
4e68ae5e
...
@@ -37,14 +37,6 @@ else:
...
@@ -37,14 +37,6 @@ else:
]
]
@
pytest
.
fixture
(
autouse
=
True
)
def
v1
(
run_with_both_engines_lora
):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def
do_sample
(
llm
:
vllm
.
LLM
,
def
do_sample
(
llm
:
vllm
.
LLM
,
lora_path
:
str
,
lora_path
:
str
,
lora_id
:
int
,
lora_id
:
int
,
...
...
tests/lora/test_qwen2vl.py
View file @
4e68ae5e
...
@@ -13,14 +13,6 @@ from vllm.platforms import current_platform
...
@@ -13,14 +13,6 @@ from vllm.platforms import current_platform
from
vllm.sampling_params
import
BeamSearchParams
from
vllm.sampling_params
import
BeamSearchParams
@
pytest
.
fixture
(
autouse
=
not
current_platform
.
is_cpu
())
def
v1
(
run_with_both_engines_lora
):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
@
dataclass
@
dataclass
class
TestConfig
:
class
TestConfig
:
model_path
:
str
model_path
:
str
...
...
tests/lora/test_worker.py
View file @
4e68ae5e
...
@@ -6,8 +6,6 @@ import tempfile
...
@@ -6,8 +6,6 @@ import tempfile
from
typing
import
Union
from
typing
import
Union
from
unittest.mock
import
patch
from
unittest.mock
import
patch
import
pytest
import
vllm.envs
as
envs
import
vllm.envs
as
envs
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
LoadConfig
,
LoRAConfig
,
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
LoadConfig
,
LoRAConfig
,
ModelConfig
,
ParallelConfig
,
SchedulerConfig
,
ModelConfig
,
ParallelConfig
,
SchedulerConfig
,
...
@@ -18,14 +16,6 @@ from vllm.v1.worker.gpu_worker import Worker as V1Worker
...
@@ -18,14 +16,6 @@ from vllm.v1.worker.gpu_worker import Worker as V1Worker
from
vllm.worker.worker
import
Worker
from
vllm.worker.worker
import
Worker
@
pytest
.
fixture
(
autouse
=
True
)
def
v1
(
run_with_both_engines_lora
):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
@
patch
.
dict
(
os
.
environ
,
{
"RANK"
:
"0"
})
@
patch
.
dict
(
os
.
environ
,
{
"RANK"
:
"0"
})
def
test_worker_apply_lora
(
sql_lora_files
):
def
test_worker_apply_lora
(
sql_lora_files
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment