Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
cc7f22a8
Commit
cc7f22a8
authored
Jun 11, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.9.1' into v0.9.1-ori
parents
b9ea0c09
b6553be1
Changes
1000
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
37 additions
and
29 deletions
+37
-29
tests/lora/test_tokenizer_group.py
tests/lora/test_tokenizer_group.py
+1
-0
tests/lora/test_transfomers_model.py
tests/lora/test_transfomers_model.py
+1
-0
tests/lora/test_utils.py
tests/lora/test_utils.py
+1
-0
tests/lora/test_worker.py
tests/lora/test_worker.py
+10
-17
tests/lora/utils.py
tests/lora/utils.py
+1
-0
tests/metrics/test_metrics.py
tests/metrics/test_metrics.py
+1
-0
tests/mistral_tool_use/conftest.py
tests/mistral_tool_use/conftest.py
+1
-0
tests/mistral_tool_use/test_mistral_tool_calls.py
tests/mistral_tool_use/test_mistral_tool_calls.py
+1
-0
tests/mistral_tool_use/utils.py
tests/mistral_tool_use/utils.py
+1
-0
tests/model_executor/conftest.py
tests/model_executor/conftest.py
+1
-0
tests/model_executor/test_enabled_custom_ops.py
tests/model_executor/test_enabled_custom_ops.py
+1
-0
tests/model_executor/test_guided_processors.py
tests/model_executor/test_guided_processors.py
+1
-0
tests/model_executor/test_logits_processor.py
tests/model_executor/test_logits_processor.py
+1
-0
tests/model_executor/test_model_load_with_params.py
tests/model_executor/test_model_load_with_params.py
+9
-11
tests/model_executor/test_weight_utils.py
tests/model_executor/test_weight_utils.py
+1
-0
tests/models/language/generation/test_bart.py
tests/models/language/generation/test_bart.py
+1
-0
tests/models/language/generation/test_common.py
tests/models/language/generation/test_common.py
+1
-1
tests/models/language/generation/test_granite.py
tests/models/language/generation/test_granite.py
+1
-0
tests/models/language/generation/test_granitemoehybrid.py
tests/models/language/generation/test_granitemoehybrid.py
+1
-0
tests/models/language/generation/test_hybrid.py
tests/models/language/generation/test_hybrid.py
+1
-0
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
tests/lora/test_tokenizer_group.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
from
transformers
import
AutoTokenizer
,
PreTrainedTokenizerBase
...
...
tests/lora/test_transfomers_model.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
...
...
tests/lora/test_utils.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
collections
import
OrderedDict
from
typing
import
NamedTuple
,
Optional
...
...
tests/lora/test_worker.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
os
import
random
...
...
@@ -6,8 +7,6 @@ import tempfile
from
typing
import
Union
from
unittest.mock
import
patch
import
pytest
import
vllm.envs
as
envs
from
vllm.config
import
(
CacheConfig
,
DeviceConfig
,
LoadConfig
,
LoRAConfig
,
ModelConfig
,
ParallelConfig
,
SchedulerConfig
,
...
...
@@ -17,13 +16,7 @@ from vllm.lora.request import LoRARequest
from
vllm.v1.worker.gpu_worker
import
Worker
as
V1Worker
from
vllm.worker.worker
import
Worker
@
pytest
.
fixture
(
autouse
=
True
)
def
v1
(
run_with_both_engines_lora
):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
NUM_LORAS
=
16
@
patch
.
dict
(
os
.
environ
,
{
"RANK"
:
"0"
})
...
...
@@ -67,12 +60,12 @@ def test_worker_apply_lora(sql_lora_files):
device_config
=
DeviceConfig
(
"cuda"
),
cache_config
=
CacheConfig
(
block_size
=
16
,
gpu_memory_utilization
=
1.0
,
swap_space
=
0
,
cache_dtype
=
"auto"
,
),
lora_config
=
LoRAConfig
(
max_lora_rank
=
8
,
max_cpu_loras
=
32
,
max_loras
=
32
),
lora_config
=
LoRAConfig
(
max_lora_rank
=
8
,
max_cpu_loras
=
NUM_LORAS
,
max_loras
=
NUM_LORAS
),
)
worker
=
worker_cls
(
vllm_config
=
vllm_config
,
...
...
@@ -87,9 +80,9 @@ def test_worker_apply_lora(sql_lora_files):
set_active_loras
(
worker
,
[])
assert
worker
.
list_loras
()
==
set
()
n_loras
=
32
lora_requests
=
[
LoRARequest
(
str
(
i
+
1
),
i
+
1
,
sql_lora_files
)
for
i
in
range
(
n_loras
)
LoRARequest
(
str
(
i
+
1
),
i
+
1
,
sql_lora_files
)
for
i
in
range
(
NUM_LORAS
)
]
set_active_loras
(
worker
,
lora_requests
)
...
...
@@ -98,12 +91,12 @@ def test_worker_apply_lora(sql_lora_files):
for
lora_request
in
lora_requests
}
for
i
in
range
(
32
):
for
i
in
range
(
NUM_LORAS
):
random
.
seed
(
i
)
iter_lora_requests
=
random
.
choices
(
lora_requests
,
k
=
random
.
randint
(
1
,
n_loras
))
k
=
random
.
randint
(
1
,
NUM_LORAS
))
random
.
shuffle
(
iter_lora_requests
)
iter_lora_requests
=
iter_lora_requests
[:
-
random
.
randint
(
0
,
n_loras
)]
iter_lora_requests
=
iter_lora_requests
[:
-
random
.
randint
(
0
,
NUM_LORAS
)]
set_active_loras
(
worker
,
lora_requests
)
assert
worker
.
list_loras
().
issuperset
(
{
lora_request
.
lora_int_id
...
...
tests/lora/utils.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
dataclasses
import
dataclass
from
typing
import
Optional
,
Union
...
...
tests/metrics/test_metrics.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
time
...
...
tests/mistral_tool_use/conftest.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
import
pytest_asyncio
...
...
tests/mistral_tool_use/test_mistral_tool_calls.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
openai
import
pytest
...
...
tests/mistral_tool_use/utils.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
typing
import
Optional
...
...
tests/model_executor/conftest.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
...
...
tests/model_executor/test_enabled_custom_ops.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
import
torch
...
...
tests/model_executor/test_guided_processors.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
json
import
pickle
...
...
tests/test_logits_processor.py
→
tests/
model_executor/
test_logits_processor.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
random
from
unittest.mock
import
patch
...
...
tests/model_executor/test_model_load_with_params.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
os
import
pytest
from
vllm.model_executor.layers.pooler
import
CLSPool
,
PoolingType
from
vllm.model_executor.layers.pooler
import
CLSPool
,
MeanPool
,
PoolingType
from
vllm.model_executor.models.bert
import
BertEmbeddingModel
from
vllm.model_executor.models.roberta
import
RobertaEmbeddingModel
from
vllm.platforms
import
current_platform
...
...
@@ -14,7 +15,7 @@ MODEL_NAME = os.environ.get("MODEL_NAME", "BAAI/bge-base-en-v1.5")
REVISION
=
os
.
environ
.
get
(
"REVISION"
,
"main"
)
MODEL_NAME_ROBERTA
=
os
.
environ
.
get
(
"MODEL_NAME"
,
"intfloat/multilingual-e5-
small
"
)
"intfloat/multilingual-e5-
base
"
)
REVISION_ROBERTA
=
os
.
environ
.
get
(
"REVISION"
,
"main"
)
...
...
@@ -40,17 +41,15 @@ def test_model_loading_with_params(vllm_runner):
# asserts on the pooling config files
assert
model_config
.
pooler_config
.
pooling_type
==
PoolingType
.
CLS
.
name
assert
model_config
.
pooler_config
.
pooling_norm
assert
model_config
.
pooler_config
.
normalize
# asserts on the tokenizer loaded
assert
model_tokenizer
.
tokenizer_id
==
"BAAI/bge-base-en-v1.5"
assert
model_tokenizer
.
tokenizer_config
[
"do_lower_case"
]
assert
model_tokenizer
.
tokenizer
.
model_max_length
==
512
def
check_model
(
model
):
assert
isinstance
(
model
,
BertEmbeddingModel
)
assert
model
.
_pooler
.
pooling_type
==
PoolingType
.
CLS
assert
model
.
_pooler
.
normalize
assert
isinstance
(
model
.
_pooler
,
CLSPool
)
vllm_model
.
apply_model
(
check_model
)
...
...
@@ -80,16 +79,15 @@ def test_roberta_model_loading_with_params(vllm_runner):
# asserts on the pooling config files
assert
model_config
.
pooler_config
.
pooling_type
==
PoolingType
.
MEAN
.
name
assert
model_config
.
pooler_config
.
pooling_norm
assert
model_config
.
pooler_config
.
normalize
# asserts on the tokenizer loaded
assert
model_tokenizer
.
tokenizer_id
==
"intfloat/multilingual-e5-
small
"
assert
not
model_tokenizer
.
tokenizer
_config
[
"do_lower_case"
]
assert
model_tokenizer
.
tokenizer_id
==
"intfloat/multilingual-e5-
base
"
assert
model_tokenizer
.
tokenizer
.
model_max_length
==
512
def
check_model
(
model
):
assert
isinstance
(
model
,
RobertaEmbeddingModel
)
assert
model
.
_pooler
.
pooling_type
==
PoolingType
.
MEAN
assert
model
.
_pooler
.
normalize
assert
isinstance
(
model
.
_pooler
,
MeanPool
)
vllm_model
.
apply_model
(
check_model
)
...
...
tests/model_executor/weight_utils.py
→
tests/model_executor/
test_
weight_utils.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
os
import
tempfile
...
...
tests/models/language/generation/test_bart.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
typing
import
Optional
import
pytest
...
...
tests/models/language/generation/test_common.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
os
from
typing
import
Optional
...
...
@@ -86,7 +87,6 @@ AITER_MODEL_LIST = [
pytest
.
param
(
"bigcode/starcoder2-3b"
),
# starcoder2
pytest
.
param
(
"TitanML/tiny-mixtral"
,
# mixtral
marks
=
[
pytest
.
mark
.
cpu_model
],
)
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
32
])
...
...
tests/models/language/generation/test_granite.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
from
...utils
import
check_logprobs_close
...
...
tests/models/language/generation/test_granitemoehybrid.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
...
...
tests/models/language/generation/test_hybrid.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
...
...
Prev
1
…
21
22
23
24
25
26
27
28
29
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment