Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
39178c7f
Unverified
Commit
39178c7f
authored
Aug 26, 2024
by
Nick Hill
Committed by
GitHub
Aug 26, 2024
Browse files
[Tests] Disable retries and use context manager for openai client (#7565)
parent
2eedede8
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
130 additions
and
93 deletions
+130
-93
tests/async_engine/test_openapi_server_ray.py
tests/async_engine/test_openapi_server_ray.py
+5
-3
tests/entrypoints/openai/test_audio.py
tests/entrypoints/openai/test_audio.py
+5
-3
tests/entrypoints/openai/test_basic.py
tests/entrypoints/openai/test_basic.py
+5
-3
tests/entrypoints/openai/test_chat.py
tests/entrypoints/openai/test_chat.py
+5
-3
tests/entrypoints/openai/test_completion.py
tests/entrypoints/openai/test_completion.py
+9
-2
tests/entrypoints/openai/test_embedding.py
tests/entrypoints/openai/test_embedding.py
+5
-4
tests/entrypoints/openai/test_encoder_decoder.py
tests/entrypoints/openai/test_encoder_decoder.py
+5
-3
tests/entrypoints/openai/test_metrics.py
tests/entrypoints/openai/test_metrics.py
+9
-2
tests/entrypoints/openai/test_models.py
tests/entrypoints/openai/test_models.py
+5
-3
tests/entrypoints/openai/test_return_tokens_as_ids.py
tests/entrypoints/openai/test_return_tokens_as_ids.py
+51
-47
tests/entrypoints/openai/test_shutdown.py
tests/entrypoints/openai/test_shutdown.py
+9
-8
tests/entrypoints/openai/test_tokenization.py
tests/entrypoints/openai/test_tokenization.py
+5
-3
tests/entrypoints/openai/test_vision.py
tests/entrypoints/openai/test_vision.py
+5
-3
tests/multi_step/test_correctness_async_llm.py
tests/multi_step/test_correctness_async_llm.py
+6
-6
tests/utils.py
tests/utils.py
+1
-0
No files found.
tests/async_engine/test_openapi_server_ray.py
View file @
39178c7f
import
openai
# use the official client for correctness check
import
openai
# use the official client for correctness check
import
pytest
import
pytest
import
pytest_asyncio
from
..utils
import
VLLM_PATH
,
RemoteOpenAIServer
from
..utils
import
VLLM_PATH
,
RemoteOpenAIServer
...
@@ -31,9 +32,10 @@ def server():
...
@@ -31,9 +32,10 @@ def server():
yield
remote_server
yield
remote_server
@
pytest
.
fixture
(
scope
=
"module"
)
@
pytest_asyncio
.
fixture
def
client
(
server
):
async
def
client
(
server
):
return
server
.
get_async_client
()
async
with
server
.
get_async_client
()
as
async_client
:
yield
async_client
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
...
...
tests/entrypoints/openai/test_audio.py
View file @
39178c7f
...
@@ -2,6 +2,7 @@ from typing import Dict, List
...
@@ -2,6 +2,7 @@ from typing import Dict, List
import
openai
import
openai
import
pytest
import
pytest
import
pytest_asyncio
from
vllm.assets.audio
import
AudioAsset
from
vllm.assets.audio
import
AudioAsset
from
vllm.multimodal.utils
import
encode_audio_base64
,
fetch_audio
from
vllm.multimodal.utils
import
encode_audio_base64
,
fetch_audio
...
@@ -28,9 +29,10 @@ def server():
...
@@ -28,9 +29,10 @@ def server():
yield
remote_server
yield
remote_server
@
pytest
.
fixture
(
scope
=
"module"
)
@
pytest_asyncio
.
fixture
def
client
(
server
):
async
def
client
(
server
):
return
server
.
get_async_client
()
async
with
server
.
get_async_client
()
as
async_client
:
yield
async_client
@
pytest
.
fixture
(
scope
=
"session"
)
@
pytest
.
fixture
(
scope
=
"session"
)
...
...
tests/entrypoints/openai/test_basic.py
View file @
39178c7f
...
@@ -2,6 +2,7 @@ from http import HTTPStatus
...
@@ -2,6 +2,7 @@ from http import HTTPStatus
import
openai
import
openai
import
pytest
import
pytest
import
pytest_asyncio
import
requests
import
requests
from
vllm.version
import
__version__
as
VLLM_VERSION
from
vllm.version
import
__version__
as
VLLM_VERSION
...
@@ -28,9 +29,10 @@ def server():
...
@@ -28,9 +29,10 @@ def server():
yield
remote_server
yield
remote_server
@
pytest
.
fixture
(
scope
=
"module"
)
@
pytest_asyncio
.
fixture
def
client
(
server
):
async
def
client
(
server
):
return
server
.
get_async_client
()
async
with
server
.
get_async_client
()
as
async_client
:
yield
async_client
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
...
...
tests/entrypoints/openai/test_chat.py
View file @
39178c7f
...
@@ -6,6 +6,7 @@ from typing import Dict, List, Optional
...
@@ -6,6 +6,7 @@ from typing import Dict, List, Optional
import
jsonschema
import
jsonschema
import
openai
# use the official client for correctness check
import
openai
# use the official client for correctness check
import
pytest
import
pytest
import
pytest_asyncio
import
torch
import
torch
from
openai
import
BadRequestError
from
openai
import
BadRequestError
...
@@ -46,9 +47,10 @@ def server(zephyr_lora_files, zephyr_lora_added_tokens_files): # noqa: F811
...
@@ -46,9 +47,10 @@ def server(zephyr_lora_files, zephyr_lora_added_tokens_files): # noqa: F811
yield
remote_server
yield
remote_server
@
pytest
.
fixture
(
scope
=
"module"
)
@
pytest_asyncio
.
fixture
def
client
(
server
):
async
def
client
(
server
):
return
server
.
get_async_client
()
async
with
server
.
get_async_client
()
as
async_client
:
yield
async_client
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
...
...
tests/entrypoints/openai/test_completion.py
View file @
39178c7f
...
@@ -8,6 +8,7 @@ from typing import Dict, List, Optional
...
@@ -8,6 +8,7 @@ from typing import Dict, List, Optional
import
jsonschema
import
jsonschema
import
openai
# use the official client for correctness check
import
openai
# use the official client for correctness check
import
pytest
import
pytest
import
pytest_asyncio
# downloading lora to test lora requests
# downloading lora to test lora requests
from
huggingface_hub
import
snapshot_download
from
huggingface_hub
import
snapshot_download
from
openai
import
BadRequestError
from
openai
import
BadRequestError
...
@@ -89,11 +90,17 @@ def default_server_args(zephyr_lora_files, zephyr_lora_added_tokens_files,
...
@@ -89,11 +90,17 @@ def default_server_args(zephyr_lora_files, zephyr_lora_added_tokens_files,
@
pytest
.
fixture
(
scope
=
"module"
,
@
pytest
.
fixture
(
scope
=
"module"
,
params
=
[
""
,
"--disable-frontend-multiprocessing"
])
params
=
[
""
,
"--disable-frontend-multiprocessing"
])
def
client
(
default_server_args
,
request
):
def
server
(
default_server_args
,
request
):
if
request
.
param
:
if
request
.
param
:
default_server_args
.
append
(
request
.
param
)
default_server_args
.
append
(
request
.
param
)
with
RemoteOpenAIServer
(
MODEL_NAME
,
default_server_args
)
as
remote_server
:
with
RemoteOpenAIServer
(
MODEL_NAME
,
default_server_args
)
as
remote_server
:
yield
remote_server
.
get_async_client
()
yield
remote_server
@
pytest_asyncio
.
fixture
async
def
client
(
server
):
async
with
server
.
get_async_client
()
as
async_client
:
yield
async_client
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
...
...
tests/entrypoints/openai/test_embedding.py
View file @
39178c7f
...
@@ -3,6 +3,7 @@ import base64
...
@@ -3,6 +3,7 @@ import base64
import
numpy
as
np
import
numpy
as
np
import
openai
import
openai
import
pytest
import
pytest
import
pytest_asyncio
from
...utils
import
RemoteOpenAIServer
from
...utils
import
RemoteOpenAIServer
...
@@ -24,10 +25,10 @@ def embedding_server():
...
@@ -24,10 +25,10 @@ def embedding_server():
yield
remote_server
yield
remote_server
@
pytest
.
mark
.
asyncio
@
pytest
_
asyncio
.
fixture
@
pytest
.
fixture
(
scope
=
"module"
)
async
def
embedding_client
(
embedding_server
):
def
embedding_client
(
embedding_server
)
:
async
with
embedding_server
.
get_async_client
()
as
async_client
:
return
embedding_server
.
get_
async_client
()
yield
async_client
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
...
...
tests/entrypoints/openai/test_encoder_decoder.py
View file @
39178c7f
import
openai
import
openai
import
pytest
import
pytest
import
pytest_asyncio
from
...utils
import
RemoteOpenAIServer
from
...utils
import
RemoteOpenAIServer
...
@@ -18,9 +19,10 @@ def server():
...
@@ -18,9 +19,10 @@ def server():
yield
remote_server
yield
remote_server
@
pytest
.
fixture
(
scope
=
"module"
)
@
pytest_asyncio
.
fixture
def
client
(
server
):
async
def
client
(
server
):
return
server
.
get_async_client
()
async
with
server
.
get_async_client
()
as
async_client
:
yield
async_client
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
...
...
tests/entrypoints/openai/test_metrics.py
View file @
39178c7f
...
@@ -6,6 +6,7 @@ from http import HTTPStatus
...
@@ -6,6 +6,7 @@ from http import HTTPStatus
import
openai
import
openai
import
pytest
import
pytest
import
pytest_asyncio
import
requests
import
requests
from
prometheus_client.parser
import
text_string_to_metric_families
from
prometheus_client.parser
import
text_string_to_metric_families
from
transformers
import
AutoTokenizer
from
transformers
import
AutoTokenizer
...
@@ -35,11 +36,17 @@ def default_server_args():
...
@@ -35,11 +36,17 @@ def default_server_args():
"--enable-chunked-prefill"
,
"--enable-chunked-prefill"
,
"--disable-frontend-multiprocessing"
,
"--disable-frontend-multiprocessing"
,
])
])
def
client
(
default_server_args
,
request
):
def
server
(
default_server_args
,
request
):
if
request
.
param
:
if
request
.
param
:
default_server_args
.
append
(
request
.
param
)
default_server_args
.
append
(
request
.
param
)
with
RemoteOpenAIServer
(
MODEL_NAME
,
default_server_args
)
as
remote_server
:
with
RemoteOpenAIServer
(
MODEL_NAME
,
default_server_args
)
as
remote_server
:
yield
remote_server
.
get_async_client
()
yield
remote_server
@
pytest_asyncio
.
fixture
async
def
client
(
server
):
async
with
server
.
get_async_client
()
as
cl
:
yield
cl
_PROMPT
=
"Hello my name is Robert and I love magic"
_PROMPT
=
"Hello my name is Robert and I love magic"
...
...
tests/entrypoints/openai/test_models.py
View file @
39178c7f
import
openai
# use the official client for correctness check
import
openai
# use the official client for correctness check
import
pytest
import
pytest
import
pytest_asyncio
# downloading lora to test lora requests
# downloading lora to test lora requests
from
huggingface_hub
import
snapshot_download
from
huggingface_hub
import
snapshot_download
...
@@ -43,9 +44,10 @@ def server(zephyr_lora_files):
...
@@ -43,9 +44,10 @@ def server(zephyr_lora_files):
yield
remote_server
yield
remote_server
@
pytest
.
fixture
(
scope
=
"module"
)
@
pytest_asyncio
.
fixture
def
client
(
server
):
async
def
client
(
server
):
return
server
.
get_async_client
()
async
with
server
.
get_async_client
()
as
async_client
:
yield
async_client
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
...
...
tests/entrypoints/openai/test_return_tokens_as_ids.py
View file @
39178c7f
...
@@ -25,59 +25,63 @@ def server_with_return_tokens_as_token_ids_flag(
...
@@ -25,59 +25,63 @@ def server_with_return_tokens_as_token_ids_flag(
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
async
def
test_completion_return_tokens_as_token_ids_completion
(
async
def
test_completion_return_tokens_as_token_ids_completion
(
server_with_return_tokens_as_token_ids_flag
):
server_with_return_tokens_as_token_ids_flag
):
client
=
server_with_return_tokens_as_token_ids_flag
.
get_async_client
()
async
with
server_with_return_tokens_as_token_ids_flag
.
get_async_client
(
)
as
client
:
completion
=
await
client
.
completions
.
create
(
completion
=
await
client
.
completions
.
create
(
model
=
MODEL_NAME
,
model
=
MODEL_NAME
,
# Include Unicode characters to test for dividing a single
# Include Unicode characters to test for dividing a single
# character across multiple tokens: 🎉 is [28705, 31862] for the
# character across multiple tokens: 🎉 is [28705, 31862] for the
# Zephyr tokenizer
# Zephyr tokenizer
prompt
=
"Say 'Hello, world! 🎉'"
,
prompt
=
"Say 'Hello, world! 🎉'"
,
echo
=
True
,
echo
=
True
,
temperature
=
0
,
temperature
=
0
,
max_tokens
=
10
,
max_tokens
=
10
,
logprobs
=
1
)
logprobs
=
1
)
text
=
completion
.
choices
[
0
].
text
text
=
completion
.
choices
[
0
].
text
token_strs
=
completion
.
choices
[
0
].
logprobs
.
tokens
token_strs
=
completion
.
choices
[
0
].
logprobs
.
tokens
tokenizer
=
get_tokenizer
(
tokenizer_name
=
MODEL_NAME
)
tokenizer
=
get_tokenizer
(
tokenizer_name
=
MODEL_NAME
)
# Check that the token representations are consistent between raw tokens
# Check that the token representations are consistent between raw
# and top_logprobs
# tokens and top_logprobs
# Slice off the first one, because there's no scoring associated with BOS
# Slice off the first one, because there's no scoring associated
top_logprobs
=
completion
.
choices
[
0
].
logprobs
.
top_logprobs
[
1
:]
# with BOS
top_logprob_keys
=
[
top_logprobs
=
completion
.
choices
[
0
].
logprobs
.
top_logprobs
[
1
:]
next
(
iter
(
logprob_by_tokens
))
for
logprob_by_tokens
in
top_logprobs
top_logprob_keys
=
[
]
next
(
iter
(
logprob_by_tokens
))
for
logprob_by_tokens
in
top_logprobs
assert
token_strs
[
1
:]
==
top_logprob_keys
]
assert
token_strs
[
1
:]
==
top_logprob_keys
# Check that decoding the tokens gives the expected text
# Check that decoding the tokens gives the expected text
tokens
=
[
int
(
token
.
removeprefix
(
"token_id:"
))
for
token
in
token_strs
]
tokens
=
[
int
(
token
.
removeprefix
(
"token_id:"
))
for
token
in
token_strs
]
assert
text
==
tokenizer
.
decode
(
tokens
,
skip_special_tokens
=
True
)
assert
text
==
tokenizer
.
decode
(
tokens
,
skip_special_tokens
=
True
)
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
async
def
test_chat_return_tokens_as_token_ids_completion
(
async
def
test_chat_return_tokens_as_token_ids_completion
(
server_with_return_tokens_as_token_ids_flag
):
server_with_return_tokens_as_token_ids_flag
):
client
=
server_with_return_tokens_as_token_ids_flag
.
get_async_client
()
async
with
server_with_return_tokens_as_token_ids_flag
.
get_async_client
(
response
=
await
client
.
chat
.
completions
.
create
(
)
as
client
:
model
=
MODEL_NAME
,
response
=
await
client
.
chat
.
completions
.
create
(
# Include Unicode characters to test for dividing a single
model
=
MODEL_NAME
,
# character across multiple tokens: 🎉 is [28705, 31862] for the
# Include Unicode characters to test for dividing a single
# Zephyr tokenizer
# character across multiple tokens: 🎉 is [28705, 31862] for the
messages
=
[{
# Zephyr tokenizer
"role"
:
"system"
,
messages
=
[{
"content"
:
"You like to respond in only emojis, like 🎉"
"role"
:
"system"
,
},
{
"content"
:
"You like to respond in only emojis, like 🎉"
"role"
:
"user"
,
},
{
"content"
:
"Please write some emojis: 🐱🐶🎉"
"role"
:
"user"
,
}],
"content"
:
"Please write some emojis: 🐱🐶🎉"
temperature
=
0
,
}],
max_tokens
=
8
,
temperature
=
0
,
logprobs
=
True
)
max_tokens
=
8
,
logprobs
=
True
)
text
=
response
.
choices
[
0
].
message
.
content
text
=
response
.
choices
[
0
].
message
.
content
tokenizer
=
get_tokenizer
(
tokenizer_name
=
MODEL_NAME
)
tokenizer
=
get_tokenizer
(
tokenizer_name
=
MODEL_NAME
)
token_ids
=
[]
token_ids
=
[]
for
logprob_content
in
response
.
choices
[
0
].
logprobs
.
content
:
for
logprob_content
in
response
.
choices
[
0
].
logprobs
.
content
:
token_ids
.
append
(
int
(
logprob_content
.
token
.
removeprefix
(
"token_id:"
)))
token_ids
.
append
(
assert
tokenizer
.
decode
(
token_ids
,
skip_special_tokens
=
True
)
==
text
int
(
logprob_content
.
token
.
removeprefix
(
"token_id:"
)))
assert
tokenizer
.
decode
(
token_ids
,
skip_special_tokens
=
True
)
==
text
tests/entrypoints/openai/test_shutdown.py
View file @
39178c7f
...
@@ -35,13 +35,14 @@ async def test_shutdown_on_engine_failure(tmp_path):
...
@@ -35,13 +35,14 @@ async def test_shutdown_on_engine_failure(tmp_path):
]
]
with
RemoteOpenAIServer
(
MODEL_NAME
,
args
)
as
remote_server
:
with
RemoteOpenAIServer
(
MODEL_NAME
,
args
)
as
remote_server
:
client
=
remote_server
.
get_async_client
()
async
with
remote_server
.
get_async_client
()
as
client
:
with
pytest
.
raises
(
openai
.
APIConnectionError
):
with
pytest
.
raises
(
# This crashes the engine
(
openai
.
APIConnectionError
,
openai
.
InternalServerError
)):
await
client
.
completions
.
create
(
model
=
"bad-adapter"
,
# This crashes the engine
prompt
=
"Hello, my name is"
)
await
client
.
completions
.
create
(
model
=
"bad-adapter"
,
prompt
=
"Hello, my name is"
)
# Now the server should shut down
# Now the server should shut down
return_code
=
remote_server
.
proc
.
wait
(
timeout
=
1
)
return_code
=
remote_server
.
proc
.
wait
(
timeout
=
3
)
assert
return_code
is
not
None
assert
return_code
is
not
None
tests/entrypoints/openai/test_tokenization.py
View file @
39178c7f
import
openai
# use the official client for correctness check
import
openai
# use the official client for correctness check
import
pytest
import
pytest
import
pytest_asyncio
import
requests
import
requests
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
...
@@ -42,9 +43,10 @@ def tokenizer_name(model_name: str,
...
@@ -42,9 +43,10 @@ def tokenizer_name(model_name: str,
model_name
==
"zephyr-lora2"
)
else
model_name
model_name
==
"zephyr-lora2"
)
else
model_name
@
pytest
.
fixture
(
scope
=
"module"
)
@
pytest_asyncio
.
fixture
def
client
(
server
):
async
def
client
(
server
):
return
server
.
get_async_client
()
async
with
server
.
get_async_client
()
as
async_client
:
yield
async_client
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
asyncio
...
...
tests/entrypoints/openai/test_vision.py
View file @
39178c7f
...
@@ -2,6 +2,7 @@ from typing import Dict, List
...
@@ -2,6 +2,7 @@ from typing import Dict, List
import
openai
import
openai
import
pytest
import
pytest
import
pytest_asyncio
from
vllm.multimodal.utils
import
encode_image_base64
,
fetch_image
from
vllm.multimodal.utils
import
encode_image_base64
,
fetch_image
...
@@ -36,9 +37,10 @@ def server():
...
@@ -36,9 +37,10 @@ def server():
yield
remote_server
yield
remote_server
@
pytest
.
fixture
(
scope
=
"module"
)
@
pytest_asyncio
.
fixture
def
client
(
server
):
async
def
client
(
server
):
return
server
.
get_async_client
()
async
with
server
.
get_async_client
()
as
async_client
:
yield
async_client
@
pytest
.
fixture
(
scope
=
"session"
)
@
pytest
.
fixture
(
scope
=
"session"
)
...
...
tests/multi_step/test_correctness_async_llm.py
View file @
39178c7f
...
@@ -28,12 +28,12 @@ async def completions_with_server_args(prompts: List[str], model_name: str,
...
@@ -28,12 +28,12 @@ async def completions_with_server_args(prompts: List[str], model_name: str,
outputs
=
None
outputs
=
None
with
RemoteOpenAIServer
(
model_name
,
server_cli_args
)
as
server
:
with
RemoteOpenAIServer
(
model_name
,
server_cli_args
)
as
server
:
client
=
server
.
get_async_client
()
async
with
server
.
get_async_client
()
as
client
:
outputs
=
await
client
.
completions
.
create
(
model
=
model_name
,
outputs
=
await
client
.
completions
.
create
(
model
=
model_name
,
prompt
=
prompts
,
prompt
=
prompts
,
temperature
=
0
,
temperature
=
0
,
stream
=
False
,
stream
=
False
,
max_tokens
=
5
)
max_tokens
=
5
)
assert
outputs
is
not
None
assert
outputs
is
not
None
return
outputs
return
outputs
...
...
tests/utils.py
View file @
39178c7f
...
@@ -154,6 +154,7 @@ class RemoteOpenAIServer:
...
@@ -154,6 +154,7 @@ class RemoteOpenAIServer:
return
openai
.
AsyncOpenAI
(
return
openai
.
AsyncOpenAI
(
base_url
=
self
.
url_for
(
"v1"
),
base_url
=
self
.
url_for
(
"v1"
),
api_key
=
self
.
DUMMY_API_KEY
,
api_key
=
self
.
DUMMY_API_KEY
,
max_retries
=
0
,
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment