Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
cc7f22a8
Commit
cc7f22a8
authored
Jun 11, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.9.1' into v0.9.1-ori
parents
b9ea0c09
b6553be1
Changes
1000
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
60 additions
and
56 deletions
+60
-56
tests/models/quantization/test_nvfp4.py
tests/models/quantization/test_nvfp4.py
+1
-0
tests/models/registry.py
tests/models/registry.py
+26
-33
tests/models/test_initialization.py
tests/models/test_initialization.py
+14
-0
tests/models/test_oot_registration.py
tests/models/test_oot_registration.py
+1
-0
tests/models/test_registry.py
tests/models/test_registry.py
+1
-0
tests/models/test_transformers.py
tests/models/test_transformers.py
+1
-0
tests/models/test_utils.py
tests/models/test_utils.py
+1
-0
tests/models/test_vision.py
tests/models/test_vision.py
+1
-0
tests/models/utils.py
tests/models/utils.py
+3
-23
tests/mq_llm_engine/conftest.py
tests/mq_llm_engine/conftest.py
+1
-0
tests/mq_llm_engine/test_abort.py
tests/mq_llm_engine/test_abort.py
+1
-0
tests/mq_llm_engine/test_error_handling.py
tests/mq_llm_engine/test_error_handling.py
+1
-0
tests/mq_llm_engine/test_load.py
tests/mq_llm_engine/test_load.py
+1
-0
tests/mq_llm_engine/utils.py
tests/mq_llm_engine/utils.py
+1
-0
tests/multi_step/test_correctness_async_llm.py
tests/multi_step/test_correctness_async_llm.py
+1
-0
tests/multi_step/test_correctness_llm.py
tests/multi_step/test_correctness_llm.py
+1
-0
tests/multimodal/test_hasher.py
tests/multimodal/test_hasher.py
+1
-0
tests/multimodal/test_image.py
tests/multimodal/test_image.py
+1
-0
tests/multimodal/test_inputs.py
tests/multimodal/test_inputs.py
+1
-0
tests/multimodal/test_processing.py
tests/multimodal/test_processing.py
+1
-0
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
tests/models/quantization/test_nvfp4.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# flake8: noqa
"""Tests Model Optimizer nvfp4 models against ground truth generation
Note: these tests will only pass on B200
...
...
tests/models/registry.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
collections.abc
import
Mapping
,
Set
from
dataclasses
import
dataclass
,
field
...
...
@@ -159,17 +160,12 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"Fairseq2LlamaForCausalLM"
:
_HfExamplesInfo
(
"mgleize/fairseq2-dummy-Llama-3.2-1B"
),
# noqa: E501
"FalconForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/falcon-7b"
),
"FalconH1ForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/Falcon-H1-1.5B-Instruct"
,
is_available_online
=
False
,
min_transformers_version
=
"4.52.2"
),
min_transformers_version
=
"4.53"
),
"GemmaForCausalLM"
:
_HfExamplesInfo
(
"google/gemma-1.1-2b-it"
),
"Gemma2ForCausalLM"
:
_HfExamplesInfo
(
"google/gemma-2-9b"
),
"Gemma3ForCausalLM"
:
_HfExamplesInfo
(
"google/gemma-3-1b-it"
),
"GlmForCausalLM"
:
_HfExamplesInfo
(
"THUDM/glm-4-9b-chat-hf"
),
"Glm4ForCausalLM"
:
_HfExamplesInfo
(
"THUDM/GLM-4-32B-0414"
,
is_available_online
=
False
,
min_transformers_version
=
"4.52.dev0"
),
"Glm4ForCausalLM"
:
_HfExamplesInfo
(
"THUDM/GLM-4-9B-0414"
),
"GPT2LMHeadModel"
:
_HfExamplesInfo
(
"openai-community/gpt2"
,
{
"alias"
:
"gpt2"
}),
"GPTBigCodeForCausalLM"
:
_HfExamplesInfo
(
"bigcode/starcoder"
,
...
...
@@ -180,8 +176,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
{
"1b"
:
"EleutherAI/pythia-1.4b"
}),
"GraniteForCausalLM"
:
_HfExamplesInfo
(
"ibm/PowerLM-3b"
),
"GraniteMoeForCausalLM"
:
_HfExamplesInfo
(
"ibm/PowerMoE-3b"
),
"GraniteMoeHybridForCausalLM"
:
_HfExamplesInfo
(
"ibm-granite/granite-4.0-tiny-preview"
,
# noqa: E501
min_transformers_version
=
"4.52.0"
),
# noqa: E501
"GraniteMoeHybridForCausalLM"
:
_HfExamplesInfo
(
"ibm-granite/granite-4.0-tiny-preview"
),
# noqa: E501
"GraniteMoeSharedForCausalLM"
:
_HfExamplesInfo
(
"ibm-research/moe-7b-1b-active-shared-experts"
),
# noqa: E501
"Grok1ModelForCausalLM"
:
_HfExamplesInfo
(
"hpcai-tech/grok-1"
,
trust_remote_code
=
True
),
...
...
@@ -202,8 +197,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"LLaMAForCausalLM"
:
_HfExamplesInfo
(
"decapoda-research/llama-7b-hf"
,
is_available_online
=
False
),
"MambaForCausalLM"
:
_HfExamplesInfo
(
"state-spaces/mamba-130m-hf"
),
"Mamba2ForCausalLM"
:
_HfExamplesInfo
(
"mistralai/Mamba-Codestral-7B-v0.1"
,
is_available_online
=
False
),
"Mamba2ForCausalLM"
:
_HfExamplesInfo
(
"mistralai/Mamba-Codestral-7B-v0.1"
),
"FalconMambaForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/falcon-mamba-7b-instruct"
),
# noqa: E501
"MiniCPMForCausalLM"
:
_HfExamplesInfo
(
"openbmb/MiniCPM-2B-sft-bf16"
,
trust_remote_code
=
True
),
...
...
@@ -218,6 +212,8 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"MptForCausalLM"
:
_HfExamplesInfo
(
"mpt"
,
is_available_online
=
False
),
"MPTForCausalLM"
:
_HfExamplesInfo
(
"mosaicml/mpt-7b"
),
"NemotronForCausalLM"
:
_HfExamplesInfo
(
"nvidia/Minitron-8B-Base"
),
"NemotronHForCausalLM"
:
_HfExamplesInfo
(
"nvidia/Nemotron-H-8B-Base-8K"
,
trust_remote_code
=
True
),
"OlmoForCausalLM"
:
_HfExamplesInfo
(
"allenai/OLMo-1B-hf"
),
"Olmo2ForCausalLM"
:
_HfExamplesInfo
(
"allenai/OLMo-2-0425-1B"
),
"OlmoeForCausalLM"
:
_HfExamplesInfo
(
"allenai/OLMoE-1B-7B-0924-Instruct"
),
...
...
@@ -242,10 +238,9 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"Qwen2MoeForCausalLM"
:
_HfExamplesInfo
(
"Qwen/Qwen1.5-MoE-A2.7B-Chat"
),
"Qwen3ForCausalLM"
:
_HfExamplesInfo
(
"Qwen/Qwen3-8B"
),
"Qwen3MoeForCausalLM"
:
_HfExamplesInfo
(
"Qwen/Qwen3-30B-A3B"
),
"RWForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/falcon-40b"
,
is_available_online
=
False
),
"RWForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/falcon-40b"
),
"StableLMEpochForCausalLM"
:
_HfExamplesInfo
(
"stabilityai/stablelm-zephyr-3b"
,
# noqa: E501
is_available_online
=
Fals
e
),
v0_only
=
Tru
e
),
"StableLmForCausalLM"
:
_HfExamplesInfo
(
"stabilityai/stablelm-3b-4e1t"
,
v0_only
=
True
),
"Starcoder2ForCausalLM"
:
_HfExamplesInfo
(
"bigcode/starcoder2-3b"
),
...
...
@@ -255,7 +250,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"TeleFLMForCausalLM"
:
_HfExamplesInfo
(
"CofeAI/FLM-2-52B-Instruct-2407"
,
trust_remote_code
=
True
),
"XverseForCausalLM"
:
_HfExamplesInfo
(
"xverse/XVERSE-7B-Chat"
,
is_available_online
=
False
,
tokenizer
=
"meta-llama/Llama-2-7b"
,
trust_remote_code
=
True
),
"Zamba2ForCausalLM"
:
_HfExamplesInfo
(
"Zyphra/Zamba2-7B-instruct"
),
"MiMoForCausalLM"
:
_HfExamplesInfo
(
"XiaomiMiMo/MiMo-7B-RL"
,
...
...
@@ -274,8 +269,7 @@ _EMBEDDING_EXAMPLE_MODELS = {
trust_remote_code
=
True
),
"GteNewModel"
:
_HfExamplesInfo
(
"Alibaba-NLP/gte-base-en-v1.5"
,
trust_remote_code
=
True
,
hf_overrides
=
{
"architectures"
:
[
"GteNewModel"
]}),
hf_overrides
=
{
"architectures"
:
[
"GteNewModel"
]}),
# noqa: E501
"InternLM2ForRewardModel"
:
_HfExamplesInfo
(
"internlm/internlm2-1_8b-reward"
,
trust_remote_code
=
True
),
"JambaForSequenceClassification"
:
_HfExamplesInfo
(
"ai21labs/Jamba-tiny-reward-dev"
),
# noqa: E501
...
...
@@ -283,7 +277,7 @@ _EMBEDDING_EXAMPLE_MODELS = {
"MistralModel"
:
_HfExamplesInfo
(
"intfloat/e5-mistral-7b-instruct"
),
"ModernBertModel"
:
_HfExamplesInfo
(
"Alibaba-NLP/gte-modernbert-base"
,
trust_remote_code
=
True
),
"NomicBertModel"
:
_HfExamplesInfo
(
"
S
no
wflake/snowflake-arctic-embed-m-long"
,
# noqa: E501
"NomicBertModel"
:
_HfExamplesInfo
(
"no
mic-ai/nomic-embed-text-v2-moe"
,
trust_remote_code
=
True
),
"Qwen2Model"
:
_HfExamplesInfo
(
"ssmits/Qwen2-7B-Instruct-embed-base"
),
"Qwen2ForRewardModel"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Math-RM-72B"
),
...
...
@@ -297,10 +291,8 @@ _EMBEDDING_EXAMPLE_MODELS = {
"Phi3VForCausalLM"
:
_HfExamplesInfo
(
"TIGER-Lab/VLM2Vec-Full"
,
trust_remote_code
=
True
),
"Qwen2VLForConditionalGeneration"
:
_HfExamplesInfo
(
"MrLight/dse-qwen2-2b-mrl-v1"
),
# noqa: E501
# The model on Huggingface is currently being updated,
# hence I temporarily mark it as not available online
"PrithviGeoSpatialMAE"
:
_HfExamplesInfo
(
"ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"
,
# noqa: E501
is_available_online
=
False
),
is_available_online
=
False
),
# noqa: E501
}
_CROSS_ENCODER_EXAMPLE_MODELS
=
{
...
...
@@ -326,8 +318,7 @@ _MULTIMODAL_EXAMPLE_MODELS = {
hf_overrides
=
{
"architectures"
:
[
"DeepseekVLV2ForCausalLM"
]}),
# noqa: E501
"FuyuForCausalLM"
:
_HfExamplesInfo
(
"adept/fuyu-8b"
),
"Gemma3ForConditionalGeneration"
:
_HfExamplesInfo
(
"google/gemma-3-4b-it"
),
"GraniteSpeechForConditionalGeneration"
:
_HfExamplesInfo
(
"ibm-granite/granite-speech-3.3-8b"
,
# noqa: E501
min_transformers_version
=
"4.52.0"
),
# noqa: E501
"GraniteSpeechForConditionalGeneration"
:
_HfExamplesInfo
(
"ibm-granite/granite-speech-3.3-2b"
),
# noqa: E501
"GLM4VForCausalLM"
:
_HfExamplesInfo
(
"THUDM/glm-4v-9b"
,
trust_remote_code
=
True
,
hf_overrides
=
{
"architectures"
:
[
"GLM4VForCausalLM"
]}),
# noqa: E501
...
...
@@ -346,7 +337,6 @@ _MULTIMODAL_EXAMPLE_MODELS = {
trust_remote_code
=
True
,
v0_only
=
True
),
"Llama4ForConditionalGeneration"
:
_HfExamplesInfo
(
"meta-llama/Llama-4-Scout-17B-16E-Instruct"
,
# noqa: E501
min_transformers_version
=
"4.51"
,
max_model_len
=
10240
),
"LlavaForConditionalGeneration"
:
_HfExamplesInfo
(
"llava-hf/llava-1.5-7b-hf"
,
extras
=
{
"mistral"
:
"mistral-community/pixtral-12b"
,
# noqa: E501
...
...
@@ -359,8 +349,6 @@ _MULTIMODAL_EXAMPLE_MODELS = {
transformers_version_reason
=
"HF model is not compatible."
,
# noqa: E501
hf_overrides
=
{
"architectures"
:
[
"MantisForConditionalGeneration"
]}),
# noqa: E501
"MiniCPMO"
:
_HfExamplesInfo
(
"openbmb/MiniCPM-o-2_6"
,
max_transformers_version
=
"4.48"
,
transformers_version_reason
=
"Use of deprecated imports which have been removed."
,
# noqa: E501
trust_remote_code
=
True
),
"MiniCPMV"
:
_HfExamplesInfo
(
"openbmb/MiniCPM-Llama3-V-2_5"
,
extras
=
{
"2.6"
:
"openbmb/MiniCPM-V-2_6"
},
# noqa: E501
...
...
@@ -398,20 +386,20 @@ _MULTIMODAL_EXAMPLE_MODELS = {
"Qwen2AudioForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2-Audio-7B-Instruct"
),
# noqa: E501
"Qwen2VLForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2-VL-2B-Instruct"
),
# noqa: E501
"Qwen2_5_VLForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-VL-3B-Instruct"
),
# noqa: E501
"Qwen2_5OmniModel"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Omni-3B"
,
min_transformers_version
=
"4.52"
),
"Qwen2_5OmniForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Omni-7B-AWQ"
,
# noqa: E501
min_transformers_version
=
"4.52"
),
"Qwen2_5OmniModel"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Omni-3B"
),
"Qwen2_5OmniForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Omni-7B-AWQ"
),
# noqa: E501
"SkyworkR1VChatModel"
:
_HfExamplesInfo
(
"Skywork/Skywork-R1V-38B"
),
"SmolVLMForConditionalGeneration"
:
_HfExamplesInfo
(
"HuggingFaceTB/SmolVLM2-2.2B-Instruct"
),
# noqa: E501
"UltravoxModel"
:
_HfExamplesInfo
(
"fixie-ai/ultravox-v0_5-llama-3_2-1b"
,
# noqa: E501
trust_remote_code
=
True
),
"TarsierForConditionalGeneration"
:
_HfExamplesInfo
(
"omni-research/Tarsier-7b"
,
# noqa: E501
hf_overrides
=
{
"architectures"
:
[
"TarsierForConditionalGeneration"
]}),
# noqa: E501
# [Encoder-decoder]
# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
# Therefore, we borrow the BartTokenizer from the original Bart model
"Florence2ForConditionalGeneration"
:
_HfExamplesInfo
(
"microsoft/Florence-2-base"
,
# noqa: E501
tokenizer
=
"Isotr0py/Florence-2-tokenizer"
,
trust_remote_code
=
True
,
),
# noqa: E501
tokenizer
=
"Isotr0py/Florence-2-tokenizer"
,
# noqa: E501
trust_remote_code
=
True
),
# noqa: E501
"MllamaForConditionalGeneration"
:
_HfExamplesInfo
(
"meta-llama/Llama-3.2-11B-Vision-Instruct"
),
# noqa: E501
"Llama4ForConditionalGeneration"
:
_HfExamplesInfo
(
"meta-llama/Llama-4-Scout-17B-16E-Instruct"
),
# noqa: E501
"WhisperForConditionalGeneration"
:
_HfExamplesInfo
(
"openai/whisper-large-v3"
),
# noqa: E501
...
...
@@ -435,6 +423,11 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
trust_remote_code
=
True
,
speculative_model
=
"yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"
,
tokenizer
=
"meta-llama/Llama-3.1-8B-Instruct"
),
"EagleMiniCPMForCausalLM"
:
_HfExamplesInfo
(
"openbmb/MiniCPM-1B-sft-bf16"
,
trust_remote_code
=
True
,
is_available_online
=
False
,
speculative_model
=
"openbmb/MiniCPM-2B-sft-bf16"
,
tokenizer
=
"openbmb/MiniCPM-2B-sft-bf16"
),
"MiMoMTPModel"
:
_HfExamplesInfo
(
"XiaomiMiMo/MiMo-7B-RL"
,
trust_remote_code
=
True
,
speculative_model
=
"XiaomiMiMo/MiMo-7B-RL"
)
...
...
tests/models/test_initialization.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
unittest.mock
import
patch
...
...
@@ -20,6 +21,10 @@ def test_can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch):
model_info
.
check_available_online
(
on_fail
=
"skip"
)
model_info
.
check_transformers_version
(
on_fail
=
"skip"
)
# FIXME: Possible memory leak in the previous tests?
if
model_arch
==
"GraniteSpeechForConditionalGeneration"
:
pytest
.
skip
(
"Avoid OOM"
)
# Avoid OOM and reduce initialization time by only using 1 layer
def
hf_overrides
(
hf_config
:
PretrainedConfig
)
->
PretrainedConfig
:
hf_config
.
update
(
model_info
.
hf_overrides
)
...
...
@@ -40,6 +45,13 @@ def test_can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch):
"num_hidden_layers"
:
1
,
})
# e.g.: ibm-granite/granite-speech-3.3-2b
if
hasattr
(
hf_config
,
"encoder_config"
):
hf_config
.
encoder_config
.
update
({
"num_layers"
:
1
,
"num_hidden_layers"
:
1
,
})
return
hf_config
# Avoid calling model.forward()
...
...
@@ -74,6 +86,8 @@ def test_can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch):
}
if
model_info
.
speculative_model
else
None
,
trust_remote_code
=
model_info
.
trust_remote_code
,
max_model_len
=
model_info
.
max_model_len
,
# these tests seem to produce leftover memory
gpu_memory_utilization
=
0.80
,
load_format
=
"dummy"
,
hf_overrides
=
hf_overrides
,
)
tests/models/test_oot_registration.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
...
...
tests/models/test_registry.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
warnings
...
...
tests/models/test_transformers.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Test the functionality of the Transformers backend."""
from
typing
import
Any
,
Optional
,
Union
...
...
tests/models/test_utils.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
torch
...
...
tests/models/test_vision.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
import
torch
...
...
tests/models/utils.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
warnings
from
collections.abc
import
Sequence
from
typing
import
TYPE_CHECKING
,
Any
,
NamedTuple
,
Optional
,
Union
from
typing
import
Any
,
NamedTuple
,
Optional
,
Union
import
torch
import
torch.nn.functional
as
F
...
...
@@ -13,9 +14,6 @@ from vllm.sequence import Logprob, PromptLogprobs, SampleLogprobs
from
.registry
import
HF_EXAMPLE_MODELS
if
TYPE_CHECKING
:
from
..conftest
import
HfRunner
TokensText
=
tuple
[
list
[
int
],
str
]
...
...
@@ -317,6 +315,7 @@ def check_embeddings_close(
dim
=
0
)
fail_msg
=
(
f
"Test
{
prompt_idx
}
:"
f
"
\n
Cosine similarity:
\t
{
sim
:.
4
f
}
"
f
"
\n
{
name_0
}
:
\t
{
embeddings_0
[:
16
]
!
r
}
"
f
"
\n
{
name_1
}
:
\t
{
embeddings_1
[:
16
]
!
r
}
"
)
...
...
@@ -337,22 +336,3 @@ class EmbedModelInfo(NamedTuple):
architecture
:
str
=
""
dtype
:
str
=
"auto"
enable_test
:
bool
=
True
def
run_embedding_correctness_test
(
hf_model
:
"HfRunner"
,
inputs
:
list
[
str
],
vllm_outputs
:
Sequence
[
list
[
float
]],
dimensions
:
Optional
[
int
]
=
None
,
):
hf_outputs
=
hf_model
.
encode
(
inputs
)
if
dimensions
:
hf_outputs
=
matryoshka_fy
(
hf_outputs
,
dimensions
)
check_embeddings_close
(
embeddings_0_lst
=
hf_outputs
,
embeddings_1_lst
=
vllm_outputs
,
name_0
=
"hf"
,
name_1
=
"vllm"
,
tol
=
1e-2
,
)
tests/mq_llm_engine/conftest.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
pytest
...
...
tests/mq_llm_engine/test_abort.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Test that aborting is handled properly."""
import
asyncio
...
...
tests/mq_llm_engine/test_error_handling.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Test that various errors are handled properly."""
import
asyncio
...
...
tests/mq_llm_engine/test_load.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Test that the MQLLMEngine is able to handle 10k concurrent requests."""
import
asyncio
...
...
tests/mq_llm_engine/utils.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
asyncio
import
multiprocessing
...
...
tests/multi_step/test_correctness_async_llm.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# Test the AsyncLLMEngine with multi-step-decoding
from
typing
import
Optional
...
...
tests/multi_step/test_correctness_llm.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# Test the LLMEngine with multi-step-decoding
...
...
tests/multimodal/test_hasher.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
pathlib
import
Path
import
numpy
as
np
...
...
tests/multimodal/test_image.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
pathlib
import
Path
import
numpy
as
np
...
...
tests/multimodal/test_inputs.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
torch
...
...
tests/multimodal/test_processing.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
contextlib
import
nullcontext
from
types
import
MethodType
...
...
Prev
1
…
25
26
27
28
29
30
31
32
33
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment