Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0da93439
Commit
0da93439
authored
Mar 26, 2026
by
zhuwenwen
Browse files
Merge tag 'v0.18.1rc0' into v0.18.1rc0-ori
parents
25f2f756
298e5108
Changes
613
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
240 additions
and
45 deletions
+240
-45
tests/entrypoints/openai/speech_to_text/__init__.py
tests/entrypoints/openai/speech_to_text/__init__.py
+0
-0
tests/entrypoints/openai/speech_to_text/test_transcription_validation.py
...ts/openai/speech_to_text/test_transcription_validation.py
+3
-3
tests/entrypoints/openai/speech_to_text/test_transcription_validation_whisper.py
...i/speech_to_text/test_transcription_validation_whisper.py
+1
-1
tests/entrypoints/openai/speech_to_text/test_translation_validation.py
...ints/openai/speech_to_text/test_translation_validation.py
+2
-2
tests/entrypoints/openai/test_cli_args.py
tests/entrypoints/openai/test_cli_args.py
+29
-0
tests/entrypoints/openai/test_multi_api_servers.py
tests/entrypoints/openai/test_multi_api_servers.py
+0
-0
tests/entrypoints/openai/test_run_batch.py
tests/entrypoints/openai/test_run_batch.py
+1
-1
tests/entrypoints/openai/tool_parsers/test_gigachat3_tool_parser.py
...ypoints/openai/tool_parsers/test_gigachat3_tool_parser.py
+194
-27
tests/entrypoints/openai/tool_parsers/test_hunyuan_a13b_tool_parser.py
...ints/openai/tool_parsers/test_hunyuan_a13b_tool_parser.py
+1
-1
tests/entrypoints/openai/tool_parsers/test_llama4_pythonic_tool_parser.py
...s/openai/tool_parsers/test_llama4_pythonic_tool_parser.py
+1
-1
tests/entrypoints/openai/tool_parsers/test_olmo3_tool_parser.py
...entrypoints/openai/tool_parsers/test_olmo3_tool_parser.py
+1
-1
tests/entrypoints/openai/tool_parsers/test_pythonic_tool_parser.py
...rypoints/openai/tool_parsers/test_pythonic_tool_parser.py
+1
-1
tests/entrypoints/pooling/embed/test_cohere_online.py
tests/entrypoints/pooling/embed/test_cohere_online.py
+1
-1
tests/entrypoints/pooling/embed/test_cohere_online_vision.py
tests/entrypoints/pooling/embed/test_cohere_online_vision.py
+1
-1
tests/entrypoints/pooling/embed/test_online.py
tests/entrypoints/pooling/embed/test_online.py
+1
-1
tests/entrypoints/pooling/embed/test_protocol.py
tests/entrypoints/pooling/embed/test_protocol.py
+1
-1
tests/entrypoints/pooling/pooling/test_online.py
tests/entrypoints/pooling/pooling/test_online.py
+1
-1
tests/entrypoints/serve/__init__.py
tests/entrypoints/serve/__init__.py
+0
-0
tests/entrypoints/serve/disagg/__init__.py
tests/entrypoints/serve/disagg/__init__.py
+0
-0
tests/entrypoints/serve/disagg/test_serving_tokens.py
tests/entrypoints/serve/disagg/test_serving_tokens.py
+1
-2
No files found.
Too many changes to show.
To preserve performance only
613 of 613+
files are displayed.
Plain diff
Email patch
tests/
v1/
entrypoints/
llm
/__init__.py
→
tests/entrypoints/
openai/speech_to_text
/__init__.py
View file @
0da93439
File moved
tests/entrypoints/openai/test_transcription_validation.py
→
tests/entrypoints/openai/
speech_to_text/
test_transcription_validation.py
View file @
0da93439
...
...
@@ -6,8 +6,8 @@ import json
import
pytest
from
...utils
import
ROCM_ENV_OVERRIDES
,
ROCM_EXTRA_ARGS
,
RemoteOpenAIServer
from
.conftest
import
add_attention_backend
from
tests.entrypoints.openai.conftest
import
add_attention_backend
from
tests.utils
import
ROCM_ENV_OVERRIDES
,
ROCM_EXTRA_ARGS
,
RemoteOpenAIServer
MISTRAL_FORMAT_ARGS
=
[
"--tokenizer_mode"
,
...
...
@@ -152,5 +152,5 @@ async def test_basic_audio_foscolo(foscolo, rocm_aiter_fa_attention, model_name)
model_name
,
foscolo
,
language
=
"it"
,
expected_text
=
"ove il mio corpo fanciulletto
giacque
"
,
expected_text
=
"ove il mio corpo fanciulletto"
,
)
tests/entrypoints/openai/test_transcription_validation_whisper.py
→
tests/entrypoints/openai/
speech_to_text/
test_transcription_validation_whisper.py
View file @
0da93439
...
...
@@ -13,7 +13,7 @@ import pytest
import
pytest_asyncio
import
soundfile
as
sf
from
..
.utils
import
RemoteOpenAIServer
from
tests
.utils
import
RemoteOpenAIServer
MODEL_NAME
=
"openai/whisper-large-v3-turbo"
...
...
tests/entrypoints/openai/test_translation_validation.py
→
tests/entrypoints/openai/
speech_to_text/
test_translation_validation.py
View file @
0da93439
...
...
@@ -14,8 +14,8 @@ import pytest
import
pytest_asyncio
import
soundfile
as
sf
from
...utils
import
RemoteOpenAIServer
from
.conftest
import
add_attention_backend
from
tests.entrypoints.openai.conftest
import
add_attention_backend
from
tests.utils
import
RemoteOpenAIServer
SERVER_ARGS
=
[
"--enforce-eager"
]
...
...
tests/entrypoints/openai/test_cli_args.py
View file @
0da93439
...
...
@@ -291,3 +291,32 @@ def test_served_model_name_parsing(tmp_path, vllm_parser, args, raises):
else
:
with
pytest
.
raises
(
raises
):
vllm_parser
.
parse_args
(
args
=
args
)
### Tests for LoRA target modules parsing
def
test_lora_target_modules_single
(
serve_parser
):
"""Test parsing single lora-target-modules argument"""
args
=
serve_parser
.
parse_args
(
args
=
[
"--enable-lora"
,
"--lora-target-modules"
,
"o_proj"
]
)
assert
args
.
lora_target_modules
==
[
"o_proj"
]
def
test_lora_target_modules_multiple
(
serve_parser
):
"""Test parsing multiple lora-target-modules arguments"""
args
=
serve_parser
.
parse_args
(
args
=
[
"--enable-lora"
,
"--lora-target-modules"
,
"o_proj"
,
"qkv_proj"
,
"down_proj"
,
]
)
assert
args
.
lora_target_modules
==
[
"o_proj"
,
"qkv_proj"
,
"down_proj"
]
def
test_lora_target_modules_default_none
(
serve_parser
):
"""Test that lora-target-modules defaults to None"""
args
=
serve_parser
.
parse_args
(
args
=
[])
assert
args
.
lora_target_modules
is
None
tests/
v1/
entrypoints/openai/test_multi_api_servers.py
→
tests/entrypoints/openai/test_multi_api_servers.py
View file @
0da93439
File moved
tests/entrypoints/openai/test_run_batch.py
View file @
0da93439
...
...
@@ -275,7 +275,7 @@ INPUT_REASONING_BATCH = "\n".join(
]
)
MINIMAL_WAV_BASE64
=
"UklGRi
Q
AAABXQVZFZm10IBAAAAABAAEA
QB8AAEAfAAABAAg
AZGF0YQAAAAA
=
"
MINIMAL_WAV_BASE64
=
"UklGRi
g
AAABXQVZFZm10IBAAAAABAAEA
gD4AAAB9AAACABA
AZGF0YQ
Q
AAAAA
AP9/
"
INPUT_TRANSCRIPTION_BATCH
=
(
json
.
dumps
(
{
...
...
tests/entrypoints/openai/tool_parsers/test_gigachat3_tool_parser.py
View file @
0da93439
...
...
@@ -5,7 +5,7 @@ import json
import
pytest
from
tests.
entrypoints.openai.
tool_parsers.utils
import
(
from
tests.tool_parsers.utils
import
(
run_tool_extraction
,
run_tool_extraction_streaming
,
)
...
...
@@ -13,6 +13,13 @@ from vllm.entrypoints.openai.engine.protocol import FunctionCall
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tool_parsers
import
ToolParser
,
ToolParserManager
MSG_SEP_TOKEN
=
"<|message_sep|>
\n\n
"
ROLE_SEP_TOKEN
=
"<|role_sep|>
\n
"
EOS_TOKEN
=
"</s>"
TOOL_HEADER_GIGACHAT3
=
f
"function call
{
ROLE_SEP_TOKEN
}
"
TOOL_HEADER_GIGACHAT31
=
"<|function_call|>"
SIMPLE_ARGS_DICT
=
{
"action"
:
"create"
,
"id"
:
"preferences"
,
...
...
@@ -24,7 +31,10 @@ SIMPLE_FUNCTION_JSON = json.dumps(
},
ensure_ascii
=
False
,
)
SIMPLE_FUNCTION_OUTPUT
=
"function call"
+
SIMPLE_FUNCTION_JSON
SIMPLE_FUNCTION_OUTPUT_GIGACHAT3
=
(
f
"
{
MSG_SEP_TOKEN
}{
TOOL_HEADER_GIGACHAT3
}{
SIMPLE_FUNCTION_JSON
}
"
)
SIMPLE_FUNCTION_OUTPUT_GIGACHAT31
=
f
"
{
TOOL_HEADER_GIGACHAT31
}{
SIMPLE_FUNCTION_JSON
}
"
SIMPLE_FUNCTION_CALL
=
FunctionCall
(
name
=
"manage_user_memory"
,
arguments
=
json
.
dumps
(
SIMPLE_ARGS_DICT
,
ensure_ascii
=
False
),
...
...
@@ -38,7 +48,12 @@ PARAMETERLESS_FUNCTION_JSON = json.dumps(
},
ensure_ascii
=
False
,
)
PARAMETERLESS_FUNCTION_OUTPUT
=
"function call"
+
PARAMETERLESS_FUNCTION_JSON
PARAMETERLESS_FUNCTION_OUTPUT_GIGACHAT3
=
(
f
"
{
MSG_SEP_TOKEN
}{
TOOL_HEADER_GIGACHAT3
}{
PARAMETERLESS_FUNCTION_JSON
}
"
)
PARAMETERLESS_FUNCTION_OUTPUT_GIGACHAT31
=
(
f
"
{
TOOL_HEADER_GIGACHAT31
}{
PARAMETERLESS_FUNCTION_JSON
}
"
)
PARAMETERLESS_FUNCTION_CALL
=
FunctionCall
(
name
=
"manage_user_memory"
,
arguments
=
json
.
dumps
({},
ensure_ascii
=
False
),
...
...
@@ -62,17 +77,38 @@ COMPLEX_FUNCTION_JSON = json.dumps(
},
ensure_ascii
=
False
,
)
COMPLEX_FUNCTION_OUTPUT
=
"function call"
+
COMPLEX_FUNCTION_JSON
COMPLEX_FUNCTION_OUTPUT_GIGACHAT3
=
(
f
"
{
MSG_SEP_TOKEN
}{
TOOL_HEADER_GIGACHAT3
}{
COMPLEX_FUNCTION_JSON
}
"
)
COMPLEX_FUNCTION_OUTPUT_GIGACHAT31
=
f
"
{
TOOL_HEADER_GIGACHAT31
}{
COMPLEX_FUNCTION_JSON
}
"
COMPLEX_FUNCTION_CALL
=
FunctionCall
(
name
=
"manage_user_memory"
,
arguments
=
json
.
dumps
(
COMPLEX_ARGS_DICT
,
ensure_ascii
=
False
),
)
CONTENT_TEXT
=
"I'll check that for you."
MIXED_OUTPUT_GIGACHAT3
=
f
"
{
CONTENT_TEXT
}{
SIMPLE_FUNCTION_OUTPUT_GIGACHAT3
}
"
MIXED_OUTPUT_GIGACHAT31
=
f
"
{
CONTENT_TEXT
}{
SIMPLE_FUNCTION_OUTPUT_GIGACHAT31
}
"
@
pytest
.
fixture
(
name
=
"gigachat_tokenizer"
)
def
fixture_gigachat_tokenizer
(
default_tokenizer
:
TokenizerLike
):
default_tokenizer
.
add_tokens
(
[
MSG_SEP_TOKEN
,
ROLE_SEP_TOKEN
,
TOOL_HEADER_GIGACHAT31
,
EOS_TOKEN
,
]
)
return
default_tokenizer
@
pytest
.
mark
.
parametrize
(
"streaming"
,
[
True
,
False
])
def
test_no_tool_call
(
streaming
:
bool
,
defaul
t_tokenizer
:
TokenizerLike
):
def
test_no_tool_call
(
streaming
:
bool
,
gigacha
t_tokenizer
:
TokenizerLike
):
tool_parser
:
ToolParser
=
ToolParserManager
.
get_tool_parser
(
"gigachat3"
)(
defaul
t_tokenizer
gigacha
t_tokenizer
)
model_output
=
"How can I help you today?"
content
,
tool_calls
=
run_tool_extraction
(
...
...
@@ -85,45 +121,143 @@ def test_no_tool_call(streaming: bool, default_tokenizer: TokenizerLike):
TEST_CASES
=
[
pytest
.
param
(
True
,
SIMPLE_FUNCTION_OUTPUT
,
SIMPLE_FUNCTION_OUTPUT_GIGACHAT3
,
[
SIMPLE_FUNCTION_CALL
],
None
,
id
=
"simple_streaming_gigachat3"
,
),
pytest
.
param
(
False
,
SIMPLE_FUNCTION_OUTPUT_GIGACHAT3
,
[
SIMPLE_FUNCTION_CALL
],
None
,
id
=
"simple_nonstreaming_gigachat3"
,
),
pytest
.
param
(
True
,
PARAMETERLESS_FUNCTION_OUTPUT_GIGACHAT3
,
[
PARAMETERLESS_FUNCTION_CALL
],
None
,
id
=
"parameterless_streaming_gigachat3"
,
),
pytest
.
param
(
False
,
PARAMETERLESS_FUNCTION_OUTPUT_GIGACHAT3
,
[
PARAMETERLESS_FUNCTION_CALL
],
None
,
id
=
"parameterless_nonstreaming_gigachat3"
,
),
pytest
.
param
(
True
,
COMPLEX_FUNCTION_OUTPUT_GIGACHAT3
,
[
COMPLEX_FUNCTION_CALL
],
None
,
id
=
"complex_streaming_gigachat3"
,
),
pytest
.
param
(
False
,
COMPLEX_FUNCTION_OUTPUT_GIGACHAT3
,
[
COMPLEX_FUNCTION_CALL
],
None
,
id
=
"complex_nonstreaming_gigachat3"
,
),
pytest
.
param
(
True
,
MIXED_OUTPUT_GIGACHAT3
,
[
SIMPLE_FUNCTION_CALL
],
CONTENT_TEXT
,
id
=
"mixed_content_streaming_gigachat3"
,
),
pytest
.
param
(
False
,
MIXED_OUTPUT_GIGACHAT3
,
[
SIMPLE_FUNCTION_CALL
],
CONTENT_TEXT
,
id
=
"mixed_content_nonstreaming_gigachat3"
,
),
pytest
.
param
(
True
,
MIXED_OUTPUT_GIGACHAT3
+
EOS_TOKEN
,
[
SIMPLE_FUNCTION_CALL
],
CONTENT_TEXT
,
id
=
"mixed_content_streaming_with_eos_gigachat3"
,
),
pytest
.
param
(
False
,
MIXED_OUTPUT_GIGACHAT3
+
EOS_TOKEN
,
[
SIMPLE_FUNCTION_CALL
],
CONTENT_TEXT
,
id
=
"mixed_content_nonstreaming_with_eos_gigachat3"
,
),
pytest
.
param
(
True
,
SIMPLE_FUNCTION_OUTPUT_GIGACHAT31
,
[
SIMPLE_FUNCTION_CALL
],
None
,
id
=
"simple_streaming"
,
id
=
"simple_streaming
_gigachat31
"
,
),
pytest
.
param
(
False
,
SIMPLE_FUNCTION_OUTPUT
,
SIMPLE_FUNCTION_OUTPUT
_GIGACHAT31
,
[
SIMPLE_FUNCTION_CALL
],
None
,
id
=
"simple_nonstreaming"
,
id
=
"simple_nonstreaming
_gigachat31
"
,
),
pytest
.
param
(
True
,
PARAMETERLESS_FUNCTION_OUTPUT
,
PARAMETERLESS_FUNCTION_OUTPUT
_GIGACHAT31
,
[
PARAMETERLESS_FUNCTION_CALL
],
None
,
id
=
"parameterless_streaming"
,
id
=
"parameterless_streaming
_gigachat31
"
,
),
pytest
.
param
(
False
,
PARAMETERLESS_FUNCTION_OUTPUT
,
PARAMETERLESS_FUNCTION_OUTPUT
_GIGACHAT31
,
[
PARAMETERLESS_FUNCTION_CALL
],
None
,
id
=
"parameterless_nonstreaming"
,
id
=
"parameterless_nonstreaming
_gigachat31
"
,
),
pytest
.
param
(
True
,
COMPLEX_FUNCTION_OUTPUT
,
COMPLEX_FUNCTION_OUTPUT
_GIGACHAT31
,
[
COMPLEX_FUNCTION_CALL
],
None
,
id
=
"complex_streaming"
,
id
=
"complex_streaming
_gigachat31
"
,
),
pytest
.
param
(
False
,
COMPLEX_FUNCTION_OUTPUT
,
COMPLEX_FUNCTION_OUTPUT
_GIGACHAT31
,
[
COMPLEX_FUNCTION_CALL
],
None
,
id
=
"complex_nonstreaming"
,
id
=
"complex_nonstreaming_gigachat31"
,
),
pytest
.
param
(
True
,
MIXED_OUTPUT_GIGACHAT31
,
[
SIMPLE_FUNCTION_CALL
],
CONTENT_TEXT
,
id
=
"mixed_content_streaming_gigachat31"
,
),
pytest
.
param
(
False
,
MIXED_OUTPUT_GIGACHAT31
,
[
SIMPLE_FUNCTION_CALL
],
CONTENT_TEXT
,
id
=
"mixed_content_nonstreaming_gigachat31"
,
),
pytest
.
param
(
True
,
MIXED_OUTPUT_GIGACHAT31
+
EOS_TOKEN
,
[
SIMPLE_FUNCTION_CALL
],
CONTENT_TEXT
,
id
=
"mixed_content_streaming_with_eos_gigachat31"
,
),
pytest
.
param
(
False
,
MIXED_OUTPUT_GIGACHAT31
+
EOS_TOKEN
,
[
SIMPLE_FUNCTION_CALL
],
CONTENT_TEXT
,
id
=
"mixed_content_nonstreaming_with_eos_gigachat31"
,
),
]
...
...
@@ -136,14 +270,16 @@ def test_tool_call(
model_output
:
str
,
expected_tool_calls
:
list
[
FunctionCall
],
expected_content
:
str
|
None
,
defaul
t_tokenizer
:
TokenizerLike
,
gigacha
t_tokenizer
:
TokenizerLike
,
):
tool_parser
:
ToolParser
=
ToolParserManager
.
get_tool_parser
(
"gigachat3"
)(
defaul
t_tokenizer
gigacha
t_tokenizer
)
content
,
tool_calls
=
run_tool_extraction
(
tool_parser
,
model_output
,
streaming
=
streaming
)
if
content
==
""
:
content
=
None
assert
content
==
expected_content
assert
len
(
tool_calls
)
==
len
(
expected_tool_calls
)
for
actual
,
expected
in
zip
(
tool_calls
,
expected_tool_calls
):
...
...
@@ -154,15 +290,46 @@ def test_tool_call(
assert
actual_args
==
expected_args
def
test_streaming_tool_call_with_large_steps
(
default_tokenizer
:
TokenizerLike
):
@
pytest
.
mark
.
parametrize
(
"model_output_deltas"
,
[
pytest
.
param
(
[
CONTENT_TEXT
[:
3
],
CONTENT_TEXT
[
3
:
5
],
CONTENT_TEXT
[
5
:],
MSG_SEP_TOKEN
,
TOOL_HEADER_GIGACHAT3
,
COMPLEX_FUNCTION_JSON
[:
40
],
COMPLEX_FUNCTION_JSON
[
40
:
-
1
],
COMPLEX_FUNCTION_JSON
[
-
1
],
],
id
=
"gigachat3"
,
),
pytest
.
param
(
[
CONTENT_TEXT
[:
3
],
CONTENT_TEXT
[
3
:
5
],
CONTENT_TEXT
[
5
:],
TOOL_HEADER_GIGACHAT31
,
COMPLEX_FUNCTION_JSON
[:
40
],
COMPLEX_FUNCTION_JSON
[
40
:
-
1
],
COMPLEX_FUNCTION_JSON
[
-
1
],
],
id
=
"gigachat31"
,
),
],
)
def
test_streaming_tool_call_with_large_steps
(
model_output_deltas
:
list
[
str
],
gigachat_tokenizer
:
TokenizerLike
,
):
"""
Test that the closing braces are streamed correctly.
"""
tool_parser
:
ToolParser
=
ToolParserManager
.
get_tool_parser
(
"gigachat3"
)(
defaul
t_tokenizer
gigacha
t_tokenizer
)
model_output_deltas
=
[
"function call"
,
COMPLEX_FUNCTION_JSON
[:
40
],
COMPLEX_FUNCTION_JSON
[
40
:],
]
reconstructor
=
run_tool_extraction_streaming
(
tool_parser
,
model_output_deltas
,
...
...
tests/entrypoints/openai/tool_parsers/test_hunyuan_a13b_tool_parser.py
View file @
0da93439
...
...
@@ -7,7 +7,7 @@ from unittest.mock import MagicMock
import
pytest
from
tests.
entrypoints.openai.
tool_parsers.utils
import
(
from
tests.tool_parsers.utils
import
(
run_tool_extraction
,
run_tool_extraction_streaming
,
)
...
...
tests/entrypoints/openai/tool_parsers/test_llama4_pythonic_tool_parser.py
View file @
0da93439
...
...
@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch
import
pytest
from
tests.
entrypoints.openai.
tool_parsers.utils
import
(
from
tests.tool_parsers.utils
import
(
run_tool_extraction
,
run_tool_extraction_streaming
,
)
...
...
tests/entrypoints/openai/tool_parsers/test_olmo3_tool_parser.py
View file @
0da93439
...
...
@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch
import
pytest
from
tests.
entrypoints.openai.
tool_parsers.utils
import
(
from
tests.tool_parsers.utils
import
(
run_tool_extraction
,
run_tool_extraction_streaming
,
)
...
...
tests/entrypoints/openai/tool_parsers/test_pythonic_tool_parser.py
View file @
0da93439
...
...
@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch
import
pytest
from
tests.
entrypoints.openai.
tool_parsers.utils
import
(
from
tests.tool_parsers.utils
import
(
run_tool_extraction
,
run_tool_extraction_streaming
,
)
...
...
tests/entrypoints/pooling/embed/test_cohere_online.py
View file @
0da93439
...
...
@@ -7,10 +7,10 @@ embedding models, covering text embedding, embedding type conversions,
response structure, batching, normalisation, and semantic similarity.
"""
import
base64
import
struct
import
numpy
as
np
import
pybase64
as
base64
import
pytest
import
requests
...
...
tests/entrypoints/pooling/embed/test_cohere_online_vision.py
View file @
0da93439
...
...
@@ -6,11 +6,11 @@ Validates image embedding, batching, normalisation, and embedding type
conversions through the /v2/embed endpoint.
"""
import
base64
import
struct
import
zlib
import
numpy
as
np
import
pybase64
as
base64
import
pytest
import
requests
...
...
tests/entrypoints/pooling/embed/test_online.py
View file @
0da93439
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
base64
import
json
import
numpy
as
np
import
openai
import
pybase64
as
base64
import
pytest
import
pytest_asyncio
import
requests
...
...
tests/entrypoints/pooling/embed/test_protocol.py
View file @
0da93439
...
...
@@ -3,10 +3,10 @@
"""Unit tests for Cohere embed protocol: build_typed_embeddings and its
underlying packing helpers, plus Cohere-specific serving helpers."""
import
base64
import
struct
import
numpy
as
np
import
pybase64
as
base64
import
pytest
from
vllm.entrypoints.pooling.embed.protocol
import
(
...
...
tests/entrypoints/pooling/pooling/test_online.py
View file @
0da93439
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
base64
import
json
import
numpy
as
np
import
pybase64
as
base64
import
pytest
import
requests
import
torch
...
...
tests/
v1/
entrypoints/
openai/serving_responses
/__init__.py
→
tests/entrypoints/
serve
/__init__.py
View file @
0da93439
File moved
tests/entrypoints/serve/disagg/__init__.py
0 → 100644
View file @
0da93439
tests/entrypoints/
openai
/test_serving_tokens.py
→
tests/entrypoints/
serve/disagg
/test_serving_tokens.py
View file @
0da93439
...
...
@@ -8,12 +8,11 @@ import pytest
import
pytest_asyncio
from
transformers
import
AutoTokenizer
from
tests.utils
import
RemoteOpenAIServer
from
vllm.config
import
ModelConfig
from
vllm.config.utils
import
getattr_iter
from
vllm.v1.engine.detokenizer
import
check_stop_strings
from
...utils
import
RemoteOpenAIServer
MODEL_NAME
=
"Qwen/Qwen3-0.6B"
GEN_ENDPOINT
=
"/inference/v1/generate"
...
...
Prev
1
…
6
7
8
9
10
11
12
13
14
…
31
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment