Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
cf069aa8
Unverified
Commit
cf069aa8
authored
Mar 03, 2025
by
Harry Mellor
Committed by
GitHub
Mar 02, 2025
Browse files
Update deprecated Python 3.8 typing (#13971)
parent
bf33700e
Changes
300
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
56 additions
and
73 deletions
+56
-73
tests/engine/test_executor.py
tests/engine/test_executor.py
+3
-3
tests/engine/test_multiproc_workers.py
tests/engine/test_multiproc_workers.py
+3
-3
tests/engine/test_stop_strings.py
tests/engine/test_stop_strings.py
+3
-3
tests/entrypoints/llm/test_chat.py
tests/entrypoints/llm/test_chat.py
+1
-3
tests/entrypoints/llm/test_encode.py
tests/entrypoints/llm/test_encode.py
+2
-3
tests/entrypoints/llm/test_generate.py
tests/entrypoints/llm/test_generate.py
+1
-2
tests/entrypoints/openai/correctness/test_transcription_api_correctness.py
.../openai/correctness/test_transcription_api_correctness.py
+1
-2
tests/entrypoints/openai/reasoning_parsers/test_deepseekr1_reasoning_parser.py
...nai/reasoning_parsers/test_deepseekr1_reasoning_parser.py
+1
-3
tests/entrypoints/openai/reasoning_parsers/utils.py
tests/entrypoints/openai/reasoning_parsers/utils.py
+7
-7
tests/entrypoints/openai/test_audio.py
tests/entrypoints/openai/test_audio.py
+7
-9
tests/entrypoints/openai/test_basic.py
tests/entrypoints/openai/test_basic.py
+1
-2
tests/entrypoints/openai/test_chat.py
tests/entrypoints/openai/test_chat.py
+4
-4
tests/entrypoints/openai/test_completion.py
tests/entrypoints/openai/test_completion.py
+4
-4
tests/entrypoints/openai/test_embedding.py
tests/entrypoints/openai/test_embedding.py
+2
-2
tests/entrypoints/openai/test_pooling.py
tests/entrypoints/openai/test_pooling.py
+2
-2
tests/entrypoints/openai/test_root_path.py
tests/entrypoints/openai/test_root_path.py
+2
-2
tests/entrypoints/openai/test_video.py
tests/entrypoints/openai/test_video.py
+5
-7
tests/entrypoints/openai/test_vision.py
tests/entrypoints/openai/test_vision.py
+5
-7
tests/entrypoints/openai/test_vision_embedding.py
tests/entrypoints/openai/test_vision_embedding.py
+1
-3
tests/entrypoints/openai/tool_parsers/test_pythonic_tool_parser.py
...rypoints/openai/tool_parsers/test_pythonic_tool_parser.py
+1
-2
No files found.
tests/engine/test_executor.py
View file @
cf069aa8
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
import
asyncio
import
asyncio
import
os
import
os
from
typing
import
Any
,
Callable
,
Dict
,
List
,
Optional
,
Tuple
,
Union
from
typing
import
Any
,
Callable
,
Optional
,
Union
import
pytest
import
pytest
...
@@ -22,8 +22,8 @@ class CustomUniExecutor(UniProcExecutor):
...
@@ -22,8 +22,8 @@ class CustomUniExecutor(UniProcExecutor):
def
collective_rpc
(
self
,
def
collective_rpc
(
self
,
method
:
Union
[
str
,
Callable
],
method
:
Union
[
str
,
Callable
],
timeout
:
Optional
[
float
]
=
None
,
timeout
:
Optional
[
float
]
=
None
,
args
:
T
uple
=
(),
args
:
t
uple
=
(),
kwargs
:
Optional
[
D
ict
]
=
None
)
->
L
ist
[
Any
]:
kwargs
:
Optional
[
d
ict
]
=
None
)
->
l
ist
[
Any
]:
# Drop marker to show that this was ran
# Drop marker to show that this was ran
with
open
(
".marker"
,
"w"
):
with
open
(
".marker"
,
"w"
):
...
...
...
...
tests/engine/test_multiproc_workers.py
View file @
cf069aa8
...
@@ -4,7 +4,7 @@ import asyncio
...
@@ -4,7 +4,7 @@ import asyncio
from
concurrent.futures
import
ThreadPoolExecutor
from
concurrent.futures
import
ThreadPoolExecutor
from
functools
import
partial
from
functools
import
partial
from
time
import
sleep
from
time
import
sleep
from
typing
import
Any
,
List
,
Tuple
from
typing
import
Any
import
pytest
import
pytest
...
@@ -17,7 +17,7 @@ from vllm.worker.worker_base import WorkerWrapperBase
...
@@ -17,7 +17,7 @@ from vllm.worker.worker_base import WorkerWrapperBase
class
DummyWorkerWrapper
(
WorkerWrapperBase
):
class
DummyWorkerWrapper
(
WorkerWrapperBase
):
"""Dummy version of vllm.worker.worker.Worker"""
"""Dummy version of vllm.worker.worker.Worker"""
def
worker_method
(
self
,
worker_input
:
Any
)
->
T
uple
[
int
,
Any
]:
def
worker_method
(
self
,
worker_input
:
Any
)
->
t
uple
[
int
,
Any
]:
sleep
(
0.05
)
sleep
(
0.05
)
if
isinstance
(
worker_input
,
Exception
):
if
isinstance
(
worker_input
,
Exception
):
...
@@ -27,7 +27,7 @@ class DummyWorkerWrapper(WorkerWrapperBase):
...
@@ -27,7 +27,7 @@ class DummyWorkerWrapper(WorkerWrapperBase):
return
self
.
rpc_rank
,
input
return
self
.
rpc_rank
,
input
def
_start_workers
()
->
T
uple
[
L
ist
[
ProcessWorkerWrapper
],
WorkerMonitor
]:
def
_start_workers
()
->
t
uple
[
l
ist
[
ProcessWorkerWrapper
],
WorkerMonitor
]:
result_handler
=
ResultHandler
()
result_handler
=
ResultHandler
()
vllm_config
=
VllmConfig
()
vllm_config
=
VllmConfig
()
workers
=
[
workers
=
[
...
...
tests/engine/test_stop_strings.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Any
,
List
,
Optional
from
typing
import
Any
,
Optional
import
pytest
import
pytest
...
@@ -21,8 +21,8 @@ def vllm_model(vllm_runner):
...
@@ -21,8 +21,8 @@ def vllm_model(vllm_runner):
def
_test_stopping
(
llm_engine
:
LLMEngine
,
def
_test_stopping
(
llm_engine
:
LLMEngine
,
expected_output
:
str
,
expected_output
:
str
,
expected_reason
:
Any
,
expected_reason
:
Any
,
stop
:
Optional
[
L
ist
[
str
]]
=
None
,
stop
:
Optional
[
l
ist
[
str
]]
=
None
,
stop_token_ids
:
Optional
[
L
ist
[
int
]]
=
None
,
stop_token_ids
:
Optional
[
l
ist
[
int
]]
=
None
,
include_in_output
:
bool
=
False
,
include_in_output
:
bool
=
False
,
use_async_output_proc
:
bool
=
False
)
->
None
:
use_async_output_proc
:
bool
=
False
)
->
None
:
llm_engine
.
add_request
(
llm_engine
.
add_request
(
...
...
tests/entrypoints/llm/test_chat.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
import
pytest
import
pytest
from
vllm
import
LLM
from
vllm
import
LLM
...
@@ -63,7 +61,7 @@ def test_multi_chat():
...
@@ -63,7 +61,7 @@ def test_multi_chat():
@
pytest
.
mark
.
parametrize
(
"image_urls"
,
@
pytest
.
mark
.
parametrize
(
"image_urls"
,
[[
TEST_IMAGE_URLS
[
0
],
TEST_IMAGE_URLS
[
1
]]])
[[
TEST_IMAGE_URLS
[
0
],
TEST_IMAGE_URLS
[
1
]]])
def
test_chat_multi_image
(
image_urls
:
L
ist
[
str
]):
def
test_chat_multi_image
(
image_urls
:
l
ist
[
str
]):
llm
=
LLM
(
llm
=
LLM
(
model
=
"microsoft/Phi-3.5-vision-instruct"
,
model
=
"microsoft/Phi-3.5-vision-instruct"
,
dtype
=
"bfloat16"
,
dtype
=
"bfloat16"
,
...
...
tests/entrypoints/llm/test_encode.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
weakref
import
weakref
from
typing
import
List
import
pytest
import
pytest
...
@@ -45,8 +44,8 @@ def llm():
...
@@ -45,8 +44,8 @@ def llm():
cleanup_dist_env_and_memory
()
cleanup_dist_env_and_memory
()
def
assert_outputs_equal
(
o1
:
L
ist
[
PoolingRequestOutput
],
def
assert_outputs_equal
(
o1
:
l
ist
[
PoolingRequestOutput
],
o2
:
L
ist
[
PoolingRequestOutput
]):
o2
:
l
ist
[
PoolingRequestOutput
]):
assert
[
o
.
outputs
for
o
in
o1
]
==
[
o
.
outputs
for
o
in
o2
]
assert
[
o
.
outputs
for
o
in
o1
]
==
[
o
.
outputs
for
o
in
o2
]
...
...
tests/entrypoints/llm/test_generate.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
weakref
import
weakref
from
typing
import
List
import
pytest
import
pytest
...
@@ -43,7 +42,7 @@ def llm():
...
@@ -43,7 +42,7 @@ def llm():
cleanup_dist_env_and_memory
()
cleanup_dist_env_and_memory
()
def
assert_outputs_equal
(
o1
:
L
ist
[
RequestOutput
],
o2
:
L
ist
[
RequestOutput
]):
def
assert_outputs_equal
(
o1
:
l
ist
[
RequestOutput
],
o2
:
l
ist
[
RequestOutput
]):
assert
[
o
.
outputs
for
o
in
o1
]
==
[
o
.
outputs
for
o
in
o2
]
assert
[
o
.
outputs
for
o
in
o1
]
==
[
o
.
outputs
for
o
in
o2
]
...
...
tests/entrypoints/openai/correctness/test_transcription_api_correctness.py
View file @
cf069aa8
...
@@ -10,7 +10,6 @@ import asyncio
...
@@ -10,7 +10,6 @@ import asyncio
import
io
import
io
import
time
import
time
from
statistics
import
mean
,
median
from
statistics
import
mean
,
median
from
typing
import
List
import
librosa
import
librosa
import
pytest
import
pytest
...
@@ -67,7 +66,7 @@ async def process_dataset(model, client, data, concurrent_request):
...
@@ -67,7 +66,7 @@ async def process_dataset(model, client, data, concurrent_request):
audio
,
sr
=
data
[
0
][
"audio"
][
"array"
],
data
[
0
][
"audio"
][
"sampling_rate"
]
audio
,
sr
=
data
[
0
][
"audio"
][
"array"
],
data
[
0
][
"audio"
][
"sampling_rate"
]
_
=
await
bound_transcribe
(
model
,
sem
,
client
,
(
audio
,
sr
),
""
)
_
=
await
bound_transcribe
(
model
,
sem
,
client
,
(
audio
,
sr
),
""
)
tasks
:
L
ist
[
asyncio
.
Task
]
=
[]
tasks
:
l
ist
[
asyncio
.
Task
]
=
[]
for
sample
in
data
:
for
sample
in
data
:
audio
,
sr
=
sample
[
"audio"
][
"array"
],
sample
[
"audio"
][
"sampling_rate"
]
audio
,
sr
=
sample
[
"audio"
][
"array"
],
sample
[
"audio"
][
"sampling_rate"
]
task
=
asyncio
.
create_task
(
task
=
asyncio
.
create_task
(
...
...
tests/entrypoints/openai/reasoning_parsers/test_deepseekr1_reasoning_parser.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
import
pytest
import
pytest
from
transformers
import
AutoTokenizer
from
transformers
import
AutoTokenizer
...
@@ -180,7 +178,7 @@ def test_reasoning(
...
@@ -180,7 +178,7 @@ def test_reasoning(
):
):
output
=
tokenizer
.
tokenize
(
param_dict
[
"output"
])
output
=
tokenizer
.
tokenize
(
param_dict
[
"output"
])
# decode everything to tokens
# decode everything to tokens
output_tokens
:
L
ist
[
str
]
=
[
output_tokens
:
l
ist
[
str
]
=
[
tokenizer
.
convert_tokens_to_string
([
token
])
for
token
in
output
tokenizer
.
convert_tokens_to_string
([
token
])
for
token
in
output
]
]
parser
:
ReasoningParser
=
ReasoningParserManager
.
get_reasoning_parser
(
parser
:
ReasoningParser
=
ReasoningParserManager
.
get_reasoning_parser
(
...
...
tests/entrypoints/openai/reasoning_parsers/utils.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Optional
,
Tuple
,
Union
from
typing
import
Optional
,
Union
from
vllm.entrypoints.openai.protocol
import
(
ChatCompletionRequest
,
from
vllm.entrypoints.openai.protocol
import
(
ChatCompletionRequest
,
DeltaMessage
)
DeltaMessage
)
...
@@ -33,10 +33,10 @@ class StreamingReasoningReconstructor:
...
@@ -33,10 +33,10 @@ class StreamingReasoningReconstructor:
def
run_reasoning_extraction
(
def
run_reasoning_extraction
(
reasoning_parser
:
ReasoningParser
,
reasoning_parser
:
ReasoningParser
,
model_output
:
L
ist
[
str
],
model_output
:
l
ist
[
str
],
request
:
Union
[
ChatCompletionRequest
,
None
]
=
None
,
request
:
Union
[
ChatCompletionRequest
,
None
]
=
None
,
streaming
:
bool
=
False
,
streaming
:
bool
=
False
,
)
->
T
uple
[
Optional
[
str
],
Optional
[
str
]]:
)
->
t
uple
[
Optional
[
str
],
Optional
[
str
]]:
if
streaming
:
if
streaming
:
reconstructor
=
run_reasoning_extraction_streaming
(
reconstructor
=
run_reasoning_extraction_streaming
(
reasoning_parser
,
reasoning_parser
,
...
@@ -55,9 +55,9 @@ def run_reasoning_extraction(
...
@@ -55,9 +55,9 @@ def run_reasoning_extraction(
def
run_reasoning_extraction_nonstreaming
(
def
run_reasoning_extraction_nonstreaming
(
reasoning_parser
:
ReasoningParser
,
reasoning_parser
:
ReasoningParser
,
model_output
:
L
ist
[
str
],
model_output
:
l
ist
[
str
],
request
:
Union
[
ChatCompletionRequest
,
None
]
=
None
,
request
:
Union
[
ChatCompletionRequest
,
None
]
=
None
,
)
->
T
uple
[
Optional
[
str
],
Optional
[
str
]]:
)
->
t
uple
[
Optional
[
str
],
Optional
[
str
]]:
request
=
request
or
ChatCompletionRequest
(
messages
=
[],
model
=
"test-model"
)
request
=
request
or
ChatCompletionRequest
(
messages
=
[],
model
=
"test-model"
)
return
reasoning_parser
.
extract_reasoning_content
(
return
reasoning_parser
.
extract_reasoning_content
(
model_output
=
''
.
join
(
model_output
),
request
=
request
)
model_output
=
''
.
join
(
model_output
),
request
=
request
)
...
@@ -65,13 +65,13 @@ def run_reasoning_extraction_nonstreaming(
...
@@ -65,13 +65,13 @@ def run_reasoning_extraction_nonstreaming(
def
run_reasoning_extraction_streaming
(
def
run_reasoning_extraction_streaming
(
reasoning_parser
:
ReasoningParser
,
reasoning_parser
:
ReasoningParser
,
model_deltas
:
L
ist
[
str
],
model_deltas
:
l
ist
[
str
],
request
:
Union
[
ChatCompletionRequest
,
None
]
=
None
,
request
:
Union
[
ChatCompletionRequest
,
None
]
=
None
,
)
->
StreamingReasoningReconstructor
:
)
->
StreamingReasoningReconstructor
:
request
=
request
or
ChatCompletionRequest
(
messages
=
[],
model
=
"test-model"
)
request
=
request
or
ChatCompletionRequest
(
messages
=
[],
model
=
"test-model"
)
reconstructor
=
StreamingReasoningReconstructor
()
reconstructor
=
StreamingReasoningReconstructor
()
previous_text
=
""
previous_text
=
""
previous_tokens
:
L
ist
[
int
]
=
[]
previous_tokens
:
l
ist
[
int
]
=
[]
for
delta
in
model_deltas
:
for
delta
in
model_deltas
:
token_delta
=
[
token_delta
=
[
reasoning_parser
.
vocab
.
get
(
token
)
reasoning_parser
.
vocab
.
get
(
token
)
...
...
tests/entrypoints/openai/test_audio.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Dict
,
List
import
openai
import
openai
import
pytest
import
pytest
import
pytest_asyncio
import
pytest_asyncio
...
@@ -41,7 +39,7 @@ async def client(server):
...
@@ -41,7 +39,7 @@ async def client(server):
@
pytest
.
fixture
(
scope
=
"session"
)
@
pytest
.
fixture
(
scope
=
"session"
)
def
base64_encoded_audio
()
->
D
ict
[
str
,
str
]:
def
base64_encoded_audio
()
->
d
ict
[
str
,
str
]:
return
{
return
{
audio_url
:
encode_audio_base64
(
*
fetch_audio
(
audio_url
))
audio_url
:
encode_audio_base64
(
*
fetch_audio
(
audio_url
))
for
audio_url
in
TEST_AUDIO_URLS
for
audio_url
in
TEST_AUDIO_URLS
...
@@ -107,7 +105,7 @@ async def test_single_chat_session_audio(client: openai.AsyncOpenAI,
...
@@ -107,7 +105,7 @@ async def test_single_chat_session_audio(client: openai.AsyncOpenAI,
@
pytest
.
mark
.
parametrize
(
"audio_url"
,
TEST_AUDIO_URLS
)
@
pytest
.
mark
.
parametrize
(
"audio_url"
,
TEST_AUDIO_URLS
)
async
def
test_single_chat_session_audio_base64encoded
(
async
def
test_single_chat_session_audio_base64encoded
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
audio_url
:
str
,
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
audio_url
:
str
,
base64_encoded_audio
:
D
ict
[
str
,
str
]):
base64_encoded_audio
:
d
ict
[
str
,
str
]):
messages
=
[{
messages
=
[{
"role"
:
"role"
:
...
@@ -165,7 +163,7 @@ async def test_single_chat_session_audio_base64encoded(
...
@@ -165,7 +163,7 @@ async def test_single_chat_session_audio_base64encoded(
@
pytest
.
mark
.
parametrize
(
"audio_url"
,
TEST_AUDIO_URLS
)
@
pytest
.
mark
.
parametrize
(
"audio_url"
,
TEST_AUDIO_URLS
)
async
def
test_single_chat_session_input_audio
(
async
def
test_single_chat_session_input_audio
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
audio_url
:
str
,
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
audio_url
:
str
,
base64_encoded_audio
:
D
ict
[
str
,
str
]):
base64_encoded_audio
:
d
ict
[
str
,
str
]):
messages
=
[{
messages
=
[{
"role"
:
"role"
:
"user"
,
"user"
,
...
@@ -255,7 +253,7 @@ async def test_chat_streaming_audio(client: openai.AsyncOpenAI,
...
@@ -255,7 +253,7 @@ async def test_chat_streaming_audio(client: openai.AsyncOpenAI,
temperature
=
0.0
,
temperature
=
0.0
,
stream
=
True
,
stream
=
True
,
)
)
chunks
:
L
ist
[
str
]
=
[]
chunks
:
l
ist
[
str
]
=
[]
finish_reason_count
=
0
finish_reason_count
=
0
async
for
chunk
in
stream
:
async
for
chunk
in
stream
:
delta
=
chunk
.
choices
[
0
].
delta
delta
=
chunk
.
choices
[
0
].
delta
...
@@ -277,7 +275,7 @@ async def test_chat_streaming_audio(client: openai.AsyncOpenAI,
...
@@ -277,7 +275,7 @@ async def test_chat_streaming_audio(client: openai.AsyncOpenAI,
@
pytest
.
mark
.
parametrize
(
"audio_url"
,
TEST_AUDIO_URLS
)
@
pytest
.
mark
.
parametrize
(
"audio_url"
,
TEST_AUDIO_URLS
)
async
def
test_chat_streaming_input_audio
(
client
:
openai
.
AsyncOpenAI
,
async
def
test_chat_streaming_input_audio
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
audio_url
:
str
,
model_name
:
str
,
audio_url
:
str
,
base64_encoded_audio
:
D
ict
[
str
,
base64_encoded_audio
:
d
ict
[
str
,
str
]):
str
]):
messages
=
[{
messages
=
[{
"role"
:
"role"
:
...
@@ -315,7 +313,7 @@ async def test_chat_streaming_input_audio(client: openai.AsyncOpenAI,
...
@@ -315,7 +313,7 @@ async def test_chat_streaming_input_audio(client: openai.AsyncOpenAI,
temperature
=
0.0
,
temperature
=
0.0
,
stream
=
True
,
stream
=
True
,
)
)
chunks
:
L
ist
[
str
]
=
[]
chunks
:
l
ist
[
str
]
=
[]
finish_reason_count
=
0
finish_reason_count
=
0
async
for
chunk
in
stream
:
async
for
chunk
in
stream
:
delta
=
chunk
.
choices
[
0
].
delta
delta
=
chunk
.
choices
[
0
].
delta
...
@@ -337,7 +335,7 @@ async def test_chat_streaming_input_audio(client: openai.AsyncOpenAI,
...
@@ -337,7 +335,7 @@ async def test_chat_streaming_input_audio(client: openai.AsyncOpenAI,
@
pytest
.
mark
.
parametrize
(
"audio_url"
,
TEST_AUDIO_URLS
)
@
pytest
.
mark
.
parametrize
(
"audio_url"
,
TEST_AUDIO_URLS
)
async
def
test_multi_audio_input
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
async
def
test_multi_audio_input
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
audio_url
:
str
,
audio_url
:
str
,
base64_encoded_audio
:
D
ict
[
str
,
str
]):
base64_encoded_audio
:
d
ict
[
str
,
str
]):
messages
=
[{
messages
=
[{
"role"
:
"role"
:
...
...
tests/entrypoints/openai/test_basic.py
View file @
cf069aa8
...
@@ -2,7 +2,6 @@
...
@@ -2,7 +2,6 @@
import
asyncio
import
asyncio
from
http
import
HTTPStatus
from
http
import
HTTPStatus
from
typing
import
List
import
openai
import
openai
import
pytest
import
pytest
...
@@ -17,7 +16,7 @@ MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
...
@@ -17,7 +16,7 @@ MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
@
pytest
.
fixture
(
scope
=
'module'
)
@
pytest
.
fixture
(
scope
=
'module'
)
def
server_args
(
request
:
pytest
.
FixtureRequest
)
->
L
ist
[
str
]:
def
server_args
(
request
:
pytest
.
FixtureRequest
)
->
l
ist
[
str
]:
""" Provide extra arguments to the server via indirect parametrization
""" Provide extra arguments to the server via indirect parametrization
Usage:
Usage:
...
...
tests/entrypoints/openai/test_chat.py
View file @
cf069aa8
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
# imports for guided decoding tests
# imports for guided decoding tests
import
json
import
json
import
re
import
re
from
typing
import
Dict
,
List
,
Optional
from
typing
import
Optional
import
jsonschema
import
jsonschema
import
openai
# use the official client for correctness check
import
openai
# use the official client for correctness check
...
@@ -190,7 +190,7 @@ async def test_too_many_chat_logprobs(client: openai.AsyncOpenAI,
...
@@ -190,7 +190,7 @@ async def test_too_many_chat_logprobs(client: openai.AsyncOpenAI,
async
def
test_prompt_logprobs_chat
(
client
:
openai
.
AsyncOpenAI
,
async
def
test_prompt_logprobs_chat
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
model_name
:
str
,
prompt_logprobs
:
Optional
[
int
]):
prompt_logprobs
:
Optional
[
int
]):
params
:
D
ict
=
{
params
:
d
ict
=
{
"messages"
:
[{
"messages"
:
[{
"role"
:
"system"
,
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
"content"
:
"You are a helpful assistant."
...
@@ -232,7 +232,7 @@ async def test_prompt_logprobs_chat(client: openai.AsyncOpenAI,
...
@@ -232,7 +232,7 @@ async def test_prompt_logprobs_chat(client: openai.AsyncOpenAI,
)
)
async
def
test_more_than_one_prompt_logprobs_chat
(
client
:
openai
.
AsyncOpenAI
,
async
def
test_more_than_one_prompt_logprobs_chat
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
model_name
:
str
):
params
:
D
ict
=
{
params
:
d
ict
=
{
"messages"
:
[{
"messages"
:
[{
"role"
:
"system"
,
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
"content"
:
"You are a helpful assistant."
...
@@ -343,7 +343,7 @@ async def test_chat_streaming(client: openai.AsyncOpenAI, model_name: str):
...
@@ -343,7 +343,7 @@ async def test_chat_streaming(client: openai.AsyncOpenAI, model_name: str):
temperature
=
0.0
,
temperature
=
0.0
,
stream
=
True
,
stream
=
True
,
)
)
chunks
:
L
ist
[
str
]
=
[]
chunks
:
l
ist
[
str
]
=
[]
finish_reason_count
=
0
finish_reason_count
=
0
async
for
chunk
in
stream
:
async
for
chunk
in
stream
:
delta
=
chunk
.
choices
[
0
].
delta
delta
=
chunk
.
choices
[
0
].
delta
...
...
tests/entrypoints/openai/test_completion.py
View file @
cf069aa8
...
@@ -5,7 +5,7 @@ import json
...
@@ -5,7 +5,7 @@ import json
import
re
import
re
import
shutil
import
shutil
from
tempfile
import
TemporaryDirectory
from
tempfile
import
TemporaryDirectory
from
typing
import
Dict
,
List
,
Optional
from
typing
import
Optional
import
jsonschema
import
jsonschema
import
openai
# use the official client for correctness check
import
openai
# use the official client for correctness check
...
@@ -287,7 +287,7 @@ async def test_too_many_completion_logprobs(client: openai.AsyncOpenAI,
...
@@ -287,7 +287,7 @@ async def test_too_many_completion_logprobs(client: openai.AsyncOpenAI,
async
def
test_prompt_logprobs_completion
(
client
:
openai
.
AsyncOpenAI
,
async
def
test_prompt_logprobs_completion
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
model_name
:
str
,
prompt_logprobs
:
Optional
[
int
]):
prompt_logprobs
:
Optional
[
int
]):
params
:
D
ict
=
{
params
:
d
ict
=
{
"prompt"
:
[
"A robot may not injure another robot"
,
"My name is"
],
"prompt"
:
[
"A robot may not injure another robot"
,
"My name is"
],
"model"
:
model_name
,
"model"
:
model_name
,
}
}
...
@@ -331,7 +331,7 @@ async def test_completion_streaming(client: openai.AsyncOpenAI,
...
@@ -331,7 +331,7 @@ async def test_completion_streaming(client: openai.AsyncOpenAI,
max_tokens
=
5
,
max_tokens
=
5
,
temperature
=
0.0
,
temperature
=
0.0
,
stream
=
True
)
stream
=
True
)
chunks
:
L
ist
[
str
]
=
[]
chunks
:
l
ist
[
str
]
=
[]
finish_reason_count
=
0
finish_reason_count
=
0
async
for
chunk
in
stream
:
async
for
chunk
in
stream
:
chunks
.
append
(
chunk
.
choices
[
0
].
text
)
chunks
.
append
(
chunk
.
choices
[
0
].
text
)
...
@@ -364,7 +364,7 @@ async def test_parallel_streaming(client: openai.AsyncOpenAI, model_name: str):
...
@@ -364,7 +364,7 @@ async def test_parallel_streaming(client: openai.AsyncOpenAI, model_name: str):
max_tokens
=
max_tokens
,
max_tokens
=
max_tokens
,
n
=
n
,
n
=
n
,
stream
=
True
)
stream
=
True
)
chunks
:
L
ist
[
L
ist
[
str
]]
=
[[]
for
i
in
range
(
n
)]
chunks
:
l
ist
[
l
ist
[
str
]]
=
[[]
for
i
in
range
(
n
)]
finish_reason_count
=
0
finish_reason_count
=
0
async
for
chunk
in
stream
:
async
for
chunk
in
stream
:
index
=
chunk
.
choices
[
0
].
index
index
=
chunk
.
choices
[
0
].
index
...
...
tests/entrypoints/openai/test_embedding.py
View file @
cf069aa8
...
@@ -86,7 +86,7 @@ async def test_single_embedding(client: openai.AsyncOpenAI, model_name: str):
...
@@ -86,7 +86,7 @@ async def test_single_embedding(client: openai.AsyncOpenAI, model_name: str):
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
async
def
test_batch_embedding
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
async
def
test_batch_embedding
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
# test
L
ist[str]
# test
l
ist[str]
input_texts
=
[
input_texts
=
[
"The cat sat on the mat."
,
"A feline was resting on a rug."
,
"The cat sat on the mat."
,
"A feline was resting on a rug."
,
"Stars twinkle brightly in the night sky."
"Stars twinkle brightly in the night sky."
...
@@ -106,7 +106,7 @@ async def test_batch_embedding(client: openai.AsyncOpenAI, model_name: str):
...
@@ -106,7 +106,7 @@ async def test_batch_embedding(client: openai.AsyncOpenAI, model_name: str):
assert
embeddings
.
usage
.
prompt_tokens
==
33
assert
embeddings
.
usage
.
prompt_tokens
==
33
assert
embeddings
.
usage
.
total_tokens
==
33
assert
embeddings
.
usage
.
total_tokens
==
33
# test
L
ist[
L
ist[int]]
# test
l
ist[
l
ist[int]]
input_tokens
=
[[
4
,
5
,
7
,
9
,
20
],
[
15
,
29
,
499
],
[
24
,
24
,
24
,
24
,
24
],
input_tokens
=
[[
4
,
5
,
7
,
9
,
20
],
[
15
,
29
,
499
],
[
24
,
24
,
24
,
24
,
24
],
[
25
,
32
,
64
,
77
]]
[
25
,
32
,
64
,
77
]]
embedding_response
=
await
client
.
embeddings
.
create
(
embedding_response
=
await
client
.
embeddings
.
create
(
...
...
tests/entrypoints/openai/test_pooling.py
View file @
cf069aa8
...
@@ -84,7 +84,7 @@ async def test_single_pooling(server: RemoteOpenAIServer, model_name: str):
...
@@ -84,7 +84,7 @@ async def test_single_pooling(server: RemoteOpenAIServer, model_name: str):
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
async
def
test_batch_pooling
(
server
:
RemoteOpenAIServer
,
model_name
:
str
):
async
def
test_batch_pooling
(
server
:
RemoteOpenAIServer
,
model_name
:
str
):
# test
L
ist[str]
# test
l
ist[str]
input_texts
=
[
input_texts
=
[
"The cat sat on the mat."
,
"A feline was resting on a rug."
,
"The cat sat on the mat."
,
"A feline was resting on a rug."
,
"Stars twinkle brightly in the night sky."
"Stars twinkle brightly in the night sky."
...
@@ -107,7 +107,7 @@ async def test_batch_pooling(server: RemoteOpenAIServer, model_name: str):
...
@@ -107,7 +107,7 @@ async def test_batch_pooling(server: RemoteOpenAIServer, model_name: str):
assert
poolings
.
usage
.
prompt_tokens
==
25
assert
poolings
.
usage
.
prompt_tokens
==
25
assert
poolings
.
usage
.
total_tokens
==
25
assert
poolings
.
usage
.
total_tokens
==
25
# test
L
ist[
L
ist[int]]
# test
l
ist[
l
ist[int]]
input_tokens
=
[[
4
,
5
,
7
,
9
,
20
],
[
15
,
29
,
499
],
[
24
,
24
,
24
,
24
,
24
],
input_tokens
=
[[
4
,
5
,
7
,
9
,
20
],
[
15
,
29
,
499
],
[
24
,
24
,
24
,
24
,
24
],
[
25
,
32
,
64
,
77
]]
[
25
,
32
,
64
,
77
]]
response
=
requests
.
post
(
response
=
requests
.
post
(
...
...
tests/entrypoints/openai/test_root_path.py
View file @
cf069aa8
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
import
contextlib
import
contextlib
import
os
import
os
from
typing
import
Any
,
List
,
NamedTuple
from
typing
import
Any
,
NamedTuple
import
openai
# use the official client for correctness check
import
openai
# use the official client for correctness check
import
pytest
import
pytest
...
@@ -40,7 +40,7 @@ def server():
...
@@ -40,7 +40,7 @@ def server():
class
TestCase
(
NamedTuple
):
class
TestCase
(
NamedTuple
):
model_name
:
str
model_name
:
str
base_url
:
L
ist
[
str
]
base_url
:
l
ist
[
str
]
api_key
:
str
api_key
:
str
expected_error
:
Any
expected_error
:
Any
...
...
tests/entrypoints/openai/test_video.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Dict
,
List
import
openai
import
openai
import
pytest
import
pytest
import
pytest_asyncio
import
pytest_asyncio
...
@@ -49,7 +47,7 @@ async def client(server):
...
@@ -49,7 +47,7 @@ async def client(server):
@
pytest
.
fixture
(
scope
=
"session"
)
@
pytest
.
fixture
(
scope
=
"session"
)
def
base64_encoded_video
()
->
D
ict
[
str
,
str
]:
def
base64_encoded_video
()
->
d
ict
[
str
,
str
]:
return
{
return
{
video_url
:
encode_video_base64
(
fetch_video
(
video_url
))
video_url
:
encode_video_base64
(
fetch_video
(
video_url
))
for
video_url
in
TEST_VIDEO_URLS
for
video_url
in
TEST_VIDEO_URLS
...
@@ -151,7 +149,7 @@ async def test_single_chat_session_video_beamsearch(client: openai.AsyncOpenAI,
...
@@ -151,7 +149,7 @@ async def test_single_chat_session_video_beamsearch(client: openai.AsyncOpenAI,
@
pytest
.
mark
.
parametrize
(
"video_url"
,
TEST_VIDEO_URLS
)
@
pytest
.
mark
.
parametrize
(
"video_url"
,
TEST_VIDEO_URLS
)
async
def
test_single_chat_session_video_base64encoded
(
async
def
test_single_chat_session_video_base64encoded
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
video_url
:
str
,
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
video_url
:
str
,
base64_encoded_video
:
D
ict
[
str
,
str
]):
base64_encoded_video
:
d
ict
[
str
,
str
]):
messages
=
[{
messages
=
[{
"role"
:
"role"
:
...
@@ -209,7 +207,7 @@ async def test_single_chat_session_video_base64encoded(
...
@@ -209,7 +207,7 @@ async def test_single_chat_session_video_base64encoded(
@
pytest
.
mark
.
parametrize
(
"video_url"
,
TEST_VIDEO_URLS
)
@
pytest
.
mark
.
parametrize
(
"video_url"
,
TEST_VIDEO_URLS
)
async
def
test_single_chat_session_video_base64encoded_beamsearch
(
async
def
test_single_chat_session_video_base64encoded_beamsearch
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
video_url
:
str
,
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
video_url
:
str
,
base64_encoded_video
:
D
ict
[
str
,
str
]):
base64_encoded_video
:
d
ict
[
str
,
str
]):
messages
=
[{
messages
=
[{
"role"
:
"role"
:
...
@@ -279,7 +277,7 @@ async def test_chat_streaming_video(client: openai.AsyncOpenAI,
...
@@ -279,7 +277,7 @@ async def test_chat_streaming_video(client: openai.AsyncOpenAI,
temperature
=
0.0
,
temperature
=
0.0
,
stream
=
True
,
stream
=
True
,
)
)
chunks
:
L
ist
[
str
]
=
[]
chunks
:
l
ist
[
str
]
=
[]
finish_reason_count
=
0
finish_reason_count
=
0
async
for
chunk
in
stream
:
async
for
chunk
in
stream
:
delta
=
chunk
.
choices
[
0
].
delta
delta
=
chunk
.
choices
[
0
].
delta
...
@@ -302,7 +300,7 @@ async def test_chat_streaming_video(client: openai.AsyncOpenAI,
...
@@ -302,7 +300,7 @@ async def test_chat_streaming_video(client: openai.AsyncOpenAI,
"video_urls"
,
"video_urls"
,
[
TEST_VIDEO_URLS
[:
i
]
for
i
in
range
(
2
,
len
(
TEST_VIDEO_URLS
))])
[
TEST_VIDEO_URLS
[:
i
]
for
i
in
range
(
2
,
len
(
TEST_VIDEO_URLS
))])
async
def
test_multi_video_input
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
async
def
test_multi_video_input
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
video_urls
:
L
ist
[
str
]):
video_urls
:
l
ist
[
str
]):
messages
=
[{
messages
=
[{
"role"
:
"role"
:
...
...
tests/entrypoints/openai/test_vision.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Dict
,
List
import
openai
import
openai
import
pytest
import
pytest
import
pytest_asyncio
import
pytest_asyncio
...
@@ -50,7 +48,7 @@ async def client(server):
...
@@ -50,7 +48,7 @@ async def client(server):
@
pytest
.
fixture
(
scope
=
"session"
)
@
pytest
.
fixture
(
scope
=
"session"
)
def
base64_encoded_image
()
->
D
ict
[
str
,
str
]:
def
base64_encoded_image
()
->
d
ict
[
str
,
str
]:
return
{
return
{
image_url
:
encode_image_base64
(
fetch_image
(
image_url
))
image_url
:
encode_image_base64
(
fetch_image
(
image_url
))
for
image_url
in
TEST_IMAGE_URLS
for
image_url
in
TEST_IMAGE_URLS
...
@@ -152,7 +150,7 @@ async def test_single_chat_session_image_beamsearch(client: openai.AsyncOpenAI,
...
@@ -152,7 +150,7 @@ async def test_single_chat_session_image_beamsearch(client: openai.AsyncOpenAI,
@
pytest
.
mark
.
parametrize
(
"image_url"
,
TEST_IMAGE_URLS
)
@
pytest
.
mark
.
parametrize
(
"image_url"
,
TEST_IMAGE_URLS
)
async
def
test_single_chat_session_image_base64encoded
(
async
def
test_single_chat_session_image_base64encoded
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
image_url
:
str
,
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
image_url
:
str
,
base64_encoded_image
:
D
ict
[
str
,
str
]):
base64_encoded_image
:
d
ict
[
str
,
str
]):
messages
=
[{
messages
=
[{
"role"
:
"role"
:
...
@@ -210,7 +208,7 @@ async def test_single_chat_session_image_base64encoded(
...
@@ -210,7 +208,7 @@ async def test_single_chat_session_image_base64encoded(
@
pytest
.
mark
.
parametrize
(
"image_url"
,
TEST_IMAGE_URLS
)
@
pytest
.
mark
.
parametrize
(
"image_url"
,
TEST_IMAGE_URLS
)
async
def
test_single_chat_session_image_base64encoded_beamsearch
(
async
def
test_single_chat_session_image_base64encoded_beamsearch
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
image_url
:
str
,
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
image_url
:
str
,
base64_encoded_image
:
D
ict
[
str
,
str
]):
base64_encoded_image
:
d
ict
[
str
,
str
]):
messages
=
[{
messages
=
[{
"role"
:
"role"
:
...
@@ -280,7 +278,7 @@ async def test_chat_streaming_image(client: openai.AsyncOpenAI,
...
@@ -280,7 +278,7 @@ async def test_chat_streaming_image(client: openai.AsyncOpenAI,
temperature
=
0.0
,
temperature
=
0.0
,
stream
=
True
,
stream
=
True
,
)
)
chunks
:
L
ist
[
str
]
=
[]
chunks
:
l
ist
[
str
]
=
[]
finish_reason_count
=
0
finish_reason_count
=
0
async
for
chunk
in
stream
:
async
for
chunk
in
stream
:
delta
=
chunk
.
choices
[
0
].
delta
delta
=
chunk
.
choices
[
0
].
delta
...
@@ -303,7 +301,7 @@ async def test_chat_streaming_image(client: openai.AsyncOpenAI,
...
@@ -303,7 +301,7 @@ async def test_chat_streaming_image(client: openai.AsyncOpenAI,
"image_urls"
,
"image_urls"
,
[
TEST_IMAGE_URLS
[:
i
]
for
i
in
range
(
2
,
len
(
TEST_IMAGE_URLS
))])
[
TEST_IMAGE_URLS
[:
i
]
for
i
in
range
(
2
,
len
(
TEST_IMAGE_URLS
))])
async
def
test_multi_image_input
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
async
def
test_multi_image_input
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
,
image_urls
:
L
ist
[
str
]):
image_urls
:
l
ist
[
str
]):
messages
=
[{
messages
=
[{
"role"
:
"role"
:
...
...
tests/entrypoints/openai/test_vision_embedding.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Dict
import
pytest
import
pytest
import
requests
import
requests
...
@@ -49,7 +47,7 @@ def server():
...
@@ -49,7 +47,7 @@ def server():
@
pytest
.
fixture
(
scope
=
"session"
)
@
pytest
.
fixture
(
scope
=
"session"
)
def
base64_encoded_image
()
->
D
ict
[
str
,
str
]:
def
base64_encoded_image
()
->
d
ict
[
str
,
str
]:
return
{
return
{
image_url
:
encode_image_base64
(
fetch_image
(
image_url
))
image_url
:
encode_image_base64
(
fetch_image
(
image_url
))
for
image_url
in
TEST_IMAGE_URLS
for
image_url
in
TEST_IMAGE_URLS
...
...
tests/entrypoints/openai/tool_parsers/test_pythonic_tool_parser.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
from
unittest.mock
import
MagicMock
from
unittest.mock
import
MagicMock
import
pytest
import
pytest
...
@@ -125,7 +124,7 @@ TEST_CASES = [
...
@@ -125,7 +124,7 @@ TEST_CASES = [
@
pytest
.
mark
.
parametrize
(
"streaming, model_output, expected_tool_calls"
,
@
pytest
.
mark
.
parametrize
(
"streaming, model_output, expected_tool_calls"
,
TEST_CASES
)
TEST_CASES
)
def
test_tool_call
(
streaming
:
bool
,
model_output
:
str
,
def
test_tool_call
(
streaming
:
bool
,
model_output
:
str
,
expected_tool_calls
:
L
ist
[
FunctionCall
]):
expected_tool_calls
:
l
ist
[
FunctionCall
]):
mock_tokenizer
=
MagicMock
()
mock_tokenizer
=
MagicMock
()
tool_parser
:
ToolParser
=
ToolParserManager
.
get_tool_parser
(
"pythonic"
)(
tool_parser
:
ToolParser
=
ToolParserManager
.
get_tool_parser
(
"pythonic"
)(
mock_tokenizer
)
mock_tokenizer
)
...
...
Prev
1
2
3
4
5
6
7
8
…
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment