Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4203926f
Unverified
Commit
4203926f
authored
Apr 02, 2025
by
Jee Jee Li
Committed by
GitHub
Apr 02, 2025
Browse files
[CI/Build] Further clean up LoRA tests (#15920)
Signed-off-by:
Jee Jee Li
<
pandaleefree@gmail.com
>
parent
cdb57015
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
9 additions
and
46 deletions
+9
-46
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+1
-3
tests/lora/conftest.py
tests/lora/conftest.py
+0
-23
tests/lora/test_layers.py
tests/lora/test_layers.py
+1
-1
tests/lora/test_llama_tp.py
tests/lora/test_llama_tp.py
+0
-17
tests/lora/test_minicpmv_tp.py
tests/lora/test_minicpmv_tp.py
+0
-1
tests/lora/test_transfomers_model.py
tests/lora/test_transfomers_model.py
+7
-1
No files found.
.buildkite/test-pipeline.yaml
View file @
4203926f
...
@@ -289,7 +289,7 @@ steps:
...
@@ -289,7 +289,7 @@ steps:
source_file_dependencies
:
source_file_dependencies
:
-
vllm/lora
-
vllm/lora
-
tests/lora
-
tests/lora
command
:
pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py
--ignore=lora/test_minicpmv_tp.py --ignore=lora/test_transfomers_model.py
command
:
pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py
parallelism
:
4
parallelism
:
4
-
label
:
PyTorch Fullgraph Smoke Test
# 9min
-
label
:
PyTorch Fullgraph Smoke Test
# 9min
...
@@ -602,8 +602,6 @@ steps:
...
@@ -602,8 +602,6 @@ steps:
# requires multi-GPU testing for validation.
# requires multi-GPU testing for validation.
-
pytest -v -s -x lora/test_chatglm3_tp.py
-
pytest -v -s -x lora/test_chatglm3_tp.py
-
pytest -v -s -x lora/test_llama_tp.py
-
pytest -v -s -x lora/test_llama_tp.py
-
pytest -v -s -x lora/test_minicpmv_tp.py
-
pytest -v -s -x lora/test_transfomers_model.py
-
label
:
Weight Loading Multiple GPU Test
# 33min
-
label
:
Weight Loading Multiple GPU Test
# 33min
...
...
tests/lora/conftest.py
View file @
4203926f
...
@@ -2,7 +2,6 @@
...
@@ -2,7 +2,6 @@
import
tempfile
import
tempfile
from
collections
import
OrderedDict
from
collections
import
OrderedDict
from
typing
import
TypedDict
from
unittest.mock
import
MagicMock
,
patch
from
unittest.mock
import
MagicMock
,
patch
import
pytest
import
pytest
...
@@ -26,28 +25,6 @@ from vllm.model_executor.models.interfaces import SupportsLoRA
...
@@ -26,28 +25,6 @@ from vllm.model_executor.models.interfaces import SupportsLoRA
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
class
ContextIDInfo
(
TypedDict
):
lora_id
:
int
context_length
:
str
class
ContextInfo
(
TypedDict
):
lora
:
str
context_length
:
str
LONG_LORA_INFOS
:
list
[
ContextIDInfo
]
=
[{
"lora_id"
:
1
,
"context_length"
:
"16k"
,
},
{
"lora_id"
:
2
,
"context_length"
:
"16k"
,
},
{
"lora_id"
:
3
,
"context_length"
:
"32k"
,
}]
@
pytest
.
fixture
()
@
pytest
.
fixture
()
def
should_do_global_cleanup_after_test
(
request
)
->
bool
:
def
should_do_global_cleanup_after_test
(
request
)
->
bool
:
"""Allow subdirectories to skip global cleanup by overriding this fixture.
"""Allow subdirectories to skip global cleanup by overriding this fixture.
...
...
tests/lora/test_layers.py
View file @
4203926f
...
@@ -59,7 +59,7 @@ DEVICES = ([
...
@@ -59,7 +59,7 @@ DEVICES = ([
# prefill stage(True) or decode stage(False)
# prefill stage(True) or decode stage(False)
STAGES
=
[
True
,
False
]
STAGES
=
[
True
,
False
]
NUM_RANDOM_SEEDS
=
10
NUM_RANDOM_SEEDS
=
6
VOCAB_PARALLEL_EMBEDDING_TEST_NUM_RANDOM_SEEDS
=
128
VOCAB_PARALLEL_EMBEDDING_TEST_NUM_RANDOM_SEEDS
=
128
...
...
tests/lora/test_llama_tp.py
View file @
4203926f
...
@@ -153,20 +153,3 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
...
@@ -153,20 +153,3 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
enable_chunked_prefill
=
True
,
enable_chunked_prefill
=
True
,
)
)
generate_and_test
(
llm
,
sql_lora_files
)
generate_and_test
(
llm
,
sql_lora_files
)
@
multi_gpu_test
(
num_gpus
=
4
)
@
create_new_process_for_each_test
()
def
test_llama_lora_tp4_fully_sharded_enable_bias
(
sql_lora_files
):
llm
=
vllm
.
LLM
(
MODEL_PATH
,
enable_lora
=
True
,
max_num_seqs
=
16
,
max_loras
=
4
,
tensor_parallel_size
=
4
,
fully_sharded_loras
=
True
,
enable_lora_bias
=
True
,
enable_chunked_prefill
=
True
,
)
generate_and_test
(
llm
,
sql_lora_files
)
tests/lora/test_minicpmv_tp.py
View file @
4203926f
...
@@ -58,7 +58,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
...
@@ -58,7 +58,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
@
pytest
.
mark
.
xfail
(
@
pytest
.
mark
.
xfail
(
current_platform
.
is_rocm
(),
current_platform
.
is_rocm
(),
reason
=
"MiniCPM-V dependency xformers incompatible with ROCm"
)
reason
=
"MiniCPM-V dependency xformers incompatible with ROCm"
)
@
create_new_process_for_each_test
()
def
test_minicpmv_lora
(
minicpmv_lora_files
):
def
test_minicpmv_lora
(
minicpmv_lora_files
):
llm
=
vllm
.
LLM
(
llm
=
vllm
.
LLM
(
MODEL_PATH
,
MODEL_PATH
,
...
...
tests/lora/test_transfomers_model.py
View file @
4203926f
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
pytest
import
vllm
import
vllm
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.platforms
import
current_platform
from
..utils
import
create_new_process_for_each_test
,
multi_gpu_test
from
..utils
import
create_new_process_for_each_test
,
multi_gpu_test
...
@@ -44,7 +47,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
...
@@ -44,7 +47,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
return
generated_texts
return
generated_texts
@
create_new_process_for_each_test
()
def
test_ilama_lora
(
ilama_lora_files
):
def
test_ilama_lora
(
ilama_lora_files
):
llm
=
vllm
.
LLM
(
MODEL_PATH
,
llm
=
vllm
.
LLM
(
MODEL_PATH
,
max_model_len
=
1024
,
max_model_len
=
1024
,
...
@@ -63,6 +65,8 @@ def test_ilama_lora(ilama_lora_files):
...
@@ -63,6 +65,8 @@ def test_ilama_lora(ilama_lora_files):
assert
output2
[
i
]
==
EXPECTED_LORA_OUTPUT
[
i
]
assert
output2
[
i
]
==
EXPECTED_LORA_OUTPUT
[
i
]
@
pytest
.
mark
.
skipif
(
current_platform
.
is_cuda_alike
(),
reason
=
"Skipping to avoid redundant model tests"
)
@
multi_gpu_test
(
num_gpus
=
4
)
@
multi_gpu_test
(
num_gpus
=
4
)
@
create_new_process_for_each_test
()
@
create_new_process_for_each_test
()
def
test_ilama_lora_tp4
(
ilama_lora_files
):
def
test_ilama_lora_tp4
(
ilama_lora_files
):
...
@@ -84,6 +88,8 @@ def test_ilama_lora_tp4(ilama_lora_files):
...
@@ -84,6 +88,8 @@ def test_ilama_lora_tp4(ilama_lora_files):
assert
output2
[
i
]
==
EXPECTED_LORA_OUTPUT
[
i
]
assert
output2
[
i
]
==
EXPECTED_LORA_OUTPUT
[
i
]
@
pytest
.
mark
.
skipif
(
current_platform
.
is_cuda_alike
(),
reason
=
"Skipping to avoid redundant model tests"
)
@
multi_gpu_test
(
num_gpus
=
4
)
@
multi_gpu_test
(
num_gpus
=
4
)
@
create_new_process_for_each_test
()
@
create_new_process_for_each_test
()
def
test_ilama_lora_tp4_fully_sharded_loras
(
ilama_lora_files
):
def
test_ilama_lora_tp4_fully_sharded_loras
(
ilama_lora_files
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment