Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e9d32d07
"vllm/vscode:/vscode.git/clone" did not exist on "22fa2e35cbbb76fde94412f565572d290027a3c3"
Unverified
Commit
e9d32d07
authored
Jun 27, 2024
by
Cyrus Leung
Committed by
GitHub
Jun 27, 2024
Browse files
[CI/Build] [1/3] Reorganize entrypoints tests (#5526)
parent
2061f0b8
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
896 additions
and
774 deletions
+896
-774
tests/entrypoints/test_openai_chat.py
tests/entrypoints/test_openai_chat.py
+875
-0
tests/entrypoints/test_openai_server.py
tests/entrypoints/test_openai_server.py
+19
-772
tests/entrypoints/test_openai_vision.py
tests/entrypoints/test_openai_vision.py
+2
-2
No files found.
tests/entrypoints/test_openai_chat.py
0 → 100644
View file @
e9d32d07
# imports for guided decoding tests
import
json
import
re
from
typing
import
List
import
jsonschema
import
openai
# use the official client for correctness check
import
pytest
# using Ray for overall ease of process management, parallel requests,
# and debugging.
import
ray
import
torch
# downloading lora to test lora requests
from
huggingface_hub
import
snapshot_download
from
openai
import
BadRequestError
from
..utils
import
VLLM_PATH
,
RemoteOpenAIServer
# any model with a chat template should work here
MODEL_NAME
=
"HuggingFaceH4/zephyr-7b-beta"
# technically this needs Mistral-7B-v0.1 as base, but we're not testing
# generation quality here
LORA_NAME
=
"typeof/zephyr-7b-beta-lora"
TEST_SCHEMA
=
{
"type"
:
"object"
,
"properties"
:
{
"name"
:
{
"type"
:
"string"
},
"age"
:
{
"type"
:
"integer"
},
"skills"
:
{
"type"
:
"array"
,
"items"
:
{
"type"
:
"string"
,
"maxLength"
:
10
},
"minItems"
:
3
},
"work history"
:
{
"type"
:
"array"
,
"items"
:
{
"type"
:
"object"
,
"properties"
:
{
"company"
:
{
"type"
:
"string"
},
"duration"
:
{
"type"
:
"string"
},
"position"
:
{
"type"
:
"string"
}
},
"required"
:
[
"company"
,
"position"
]
}
}
},
"required"
:
[
"name"
,
"age"
,
"skills"
,
"work history"
]
}
TEST_REGEX
=
(
r
"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}"
r
"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)"
)
TEST_CHOICE
=
[
"Python"
,
"Java"
,
"JavaScript"
,
"C++"
,
"C#"
,
"PHP"
,
"TypeScript"
,
"Ruby"
,
"Swift"
,
"Kotlin"
]
pytestmark
=
pytest
.
mark
.
openai
@
pytest
.
fixture
(
scope
=
"session"
)
def
zephyr_lora_files
():
return
snapshot_download
(
repo_id
=
LORA_NAME
)
@
pytest
.
fixture
(
scope
=
"module"
)
def
ray_ctx
():
ray
.
init
(
runtime_env
=
{
"working_dir"
:
VLLM_PATH
})
yield
ray
.
shutdown
()
@
pytest
.
fixture
(
scope
=
"module"
)
def
server
(
zephyr_lora_files
,
ray_ctx
):
return
RemoteOpenAIServer
([
"--model"
,
MODEL_NAME
,
# use half precision for speed and memory savings in CI environment
"--dtype"
,
"bfloat16"
,
"--max-model-len"
,
"8192"
,
"--enforce-eager"
,
# lora config below
"--enable-lora"
,
"--lora-modules"
,
f
"zephyr-lora=
{
zephyr_lora_files
}
"
,
f
"zephyr-lora2=
{
zephyr_lora_files
}
"
,
"--max-lora-rank"
,
"64"
,
"--max-cpu-loras"
,
"2"
,
"--max-num-seqs"
,
"128"
,
])
@
pytest
.
fixture
(
scope
=
"module"
)
def
client
(
server
):
return
server
.
get_async_client
()
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
# first test base model, then test loras
"model_name"
,
[
MODEL_NAME
,
"zephyr-lora"
,
"zephyr-lora2"
],
)
async
def
test_no_logprobs_chat
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"what is 1+1?"
}]
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
5
,
temperature
=
0.0
,
logprobs
=
False
)
choice
=
chat_completion
.
choices
[
0
]
assert
choice
.
logprobs
is
None
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
# just test 1 lora hereafter
"model_name"
,
[
MODEL_NAME
,
"zephyr-lora"
],
)
async
def
test_zero_logprobs_chat
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"what is 1+1?"
}]
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
5
,
temperature
=
0.0
,
logprobs
=
True
,
top_logprobs
=
0
)
choice
=
chat_completion
.
choices
[
0
]
assert
choice
.
logprobs
is
not
None
assert
choice
.
logprobs
.
content
is
not
None
assert
len
(
choice
.
logprobs
.
content
[
0
].
top_logprobs
)
==
0
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
,
"zephyr-lora"
],
)
async
def
test_some_logprobs_chat
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"what is 1+1?"
}]
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
5
,
temperature
=
0.0
,
logprobs
=
True
,
top_logprobs
=
5
)
choice
=
chat_completion
.
choices
[
0
]
assert
choice
.
logprobs
is
not
None
assert
choice
.
logprobs
.
content
is
not
None
assert
len
(
choice
.
logprobs
.
content
[
0
].
top_logprobs
)
==
5
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
,
"zephyr-lora"
],
)
async
def
test_too_many_chat_logprobs
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"what is 1+1?"
}]
# Default max_logprobs is 20, so this should raise an error
with
pytest
.
raises
((
openai
.
BadRequestError
,
openai
.
APIError
)):
stream
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
logprobs
=
True
,
top_logprobs
=
21
,
stream
=
True
)
async
for
chunk
in
stream
:
...
with
pytest
.
raises
(
openai
.
BadRequestError
):
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
logprobs
=
True
,
top_logprobs
=
30
,
stream
=
False
)
# the server should still work afterwards
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
stream
=
False
)
message
=
chat_completion
.
choices
[
0
].
message
assert
message
.
content
is
not
None
and
len
(
message
.
content
)
>=
0
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
,
"zephyr-lora"
],
)
async
def
test_single_chat_session
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"what is 1+1?"
}]
# test single completion
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
logprobs
=
True
,
top_logprobs
=
5
)
assert
chat_completion
.
id
is
not
None
assert
len
(
chat_completion
.
choices
)
==
1
choice
=
chat_completion
.
choices
[
0
]
assert
choice
.
finish_reason
==
"length"
assert
chat_completion
.
usage
==
openai
.
types
.
CompletionUsage
(
completion_tokens
=
10
,
prompt_tokens
=
37
,
total_tokens
=
47
)
message
=
choice
.
message
assert
message
.
content
is
not
None
and
len
(
message
.
content
)
>=
10
assert
message
.
role
==
"assistant"
messages
.
append
({
"role"
:
"assistant"
,
"content"
:
message
.
content
})
# test multi-turn dialogue
messages
.
append
({
"role"
:
"user"
,
"content"
:
"express your result in json"
})
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
)
message
=
chat_completion
.
choices
[
0
].
message
assert
message
.
content
is
not
None
and
len
(
message
.
content
)
>=
0
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
# just test 1 lora hereafter
"model_name"
,
[
MODEL_NAME
,
"zephyr-lora"
],
)
async
def
test_chat_streaming
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"what is 1+1?"
}]
# test single completion
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
temperature
=
0.0
,
)
output
=
chat_completion
.
choices
[
0
].
message
.
content
stop_reason
=
chat_completion
.
choices
[
0
].
finish_reason
# test streaming
stream
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
temperature
=
0.0
,
stream
=
True
,
)
chunks
:
List
[
str
]
=
[]
finish_reason_count
=
0
async
for
chunk
in
stream
:
delta
=
chunk
.
choices
[
0
].
delta
if
delta
.
role
:
assert
delta
.
role
==
"assistant"
if
delta
.
content
:
chunks
.
append
(
delta
.
content
)
if
chunk
.
choices
[
0
].
finish_reason
is
not
None
:
finish_reason_count
+=
1
# finish reason should only return in last block
assert
finish_reason_count
==
1
assert
chunk
.
choices
[
0
].
finish_reason
==
stop_reason
assert
delta
.
content
assert
""
.
join
(
chunks
)
==
output
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
"HuggingFaceH4/zephyr-7b-beta"
,
"zephyr-lora"
],
)
async
def
test_chat_completion_stream_options
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
"What is the capital of France?"
}]
# Test stream=True, stream_options={"include_usage": False}
stream
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
temperature
=
0.0
,
stream
=
True
,
stream_options
=
{
"include_usage"
:
False
})
async
for
chunk
in
stream
:
assert
chunk
.
usage
is
None
# Test stream=True, stream_options={"include_usage": True}
stream
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
temperature
=
0.0
,
stream
=
True
,
stream_options
=
{
"include_usage"
:
True
})
async
for
chunk
in
stream
:
if
chunk
.
choices
[
0
].
finish_reason
is
None
:
assert
chunk
.
usage
is
None
else
:
assert
chunk
.
usage
is
None
final_chunk
=
await
stream
.
__anext__
()
assert
final_chunk
.
usage
is
not
None
assert
final_chunk
.
usage
.
prompt_tokens
>
0
assert
final_chunk
.
usage
.
completion_tokens
>
0
assert
final_chunk
.
usage
.
total_tokens
==
(
final_chunk
.
usage
.
prompt_tokens
+
final_chunk
.
usage
.
completion_tokens
)
assert
final_chunk
.
choices
==
[]
# Test stream=False, stream_options={"include_usage": None}
with
pytest
.
raises
(
BadRequestError
):
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
temperature
=
0.0
,
stream
=
False
,
stream_options
=
{
"include_usage"
:
None
})
# Test stream=False, stream_options={"include_usage": True}
with
pytest
.
raises
(
BadRequestError
):
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
temperature
=
0.0
,
stream
=
False
,
stream_options
=
{
"include_usage"
:
True
})
# NOTE: Not sure why, but when I place this after `test_guided_regex_chat`
# (i.e. using the same ordering as in the Completions API tests), the test
# will fail on the second `guided_decoding_backend` even when I swap their order
# (ref: https://github.com/vllm-project/vllm/pull/5526#issuecomment-2173772256)
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
,
"lm-format-enforcer"
])
async
def
test_guided_choice_chat
(
client
:
openai
.
AsyncOpenAI
,
guided_decoding_backend
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"The best language for type-safe systems programming is "
}]
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
10
,
extra_body
=
dict
(
guided_choice
=
TEST_CHOICE
,
guided_decoding_backend
=
guided_decoding_backend
))
choice1
=
chat_completion
.
choices
[
0
].
message
.
content
assert
choice1
in
TEST_CHOICE
messages
.
append
({
"role"
:
"assistant"
,
"content"
:
choice1
})
messages
.
append
({
"role"
:
"user"
,
"content"
:
"I disagree, pick another one"
})
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
10
,
extra_body
=
dict
(
guided_choice
=
TEST_CHOICE
,
guided_decoding_backend
=
guided_decoding_backend
))
choice2
=
chat_completion
.
choices
[
0
].
message
.
content
assert
choice2
in
TEST_CHOICE
assert
choice1
!=
choice2
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
,
"lm-format-enforcer"
])
async
def
test_guided_json_chat
(
client
:
openai
.
AsyncOpenAI
,
guided_decoding_backend
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
f
"Give an example JSON for an employee profile that "
f
"fits this schema:
{
TEST_SCHEMA
}
"
}]
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
1000
,
extra_body
=
dict
(
guided_json
=
TEST_SCHEMA
,
guided_decoding_backend
=
guided_decoding_backend
))
message
=
chat_completion
.
choices
[
0
].
message
assert
message
.
content
is
not
None
json1
=
json
.
loads
(
message
.
content
)
jsonschema
.
validate
(
instance
=
json1
,
schema
=
TEST_SCHEMA
)
messages
.
append
({
"role"
:
"assistant"
,
"content"
:
message
.
content
})
messages
.
append
({
"role"
:
"user"
,
"content"
:
"Give me another one with a different name and age"
})
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
1000
,
extra_body
=
dict
(
guided_json
=
TEST_SCHEMA
,
guided_decoding_backend
=
guided_decoding_backend
))
message
=
chat_completion
.
choices
[
0
].
message
assert
message
.
content
is
not
None
json2
=
json
.
loads
(
message
.
content
)
jsonschema
.
validate
(
instance
=
json2
,
schema
=
TEST_SCHEMA
)
assert
json1
[
"name"
]
!=
json2
[
"name"
]
assert
json1
[
"age"
]
!=
json2
[
"age"
]
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
,
"lm-format-enforcer"
])
async
def
test_guided_regex_chat
(
client
:
openai
.
AsyncOpenAI
,
guided_decoding_backend
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
f
"Give an example IP address with this regex:
{
TEST_REGEX
}
"
}]
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
20
,
extra_body
=
dict
(
guided_regex
=
TEST_REGEX
,
guided_decoding_backend
=
guided_decoding_backend
))
ip1
=
chat_completion
.
choices
[
0
].
message
.
content
assert
ip1
is
not
None
assert
re
.
fullmatch
(
TEST_REGEX
,
ip1
)
is
not
None
messages
.
append
({
"role"
:
"assistant"
,
"content"
:
ip1
})
messages
.
append
({
"role"
:
"user"
,
"content"
:
"Give me a different one"
})
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
20
,
extra_body
=
dict
(
guided_regex
=
TEST_REGEX
,
guided_decoding_backend
=
guided_decoding_backend
))
ip2
=
chat_completion
.
choices
[
0
].
message
.
content
assert
ip2
is
not
None
assert
re
.
fullmatch
(
TEST_REGEX
,
ip2
)
is
not
None
assert
ip1
!=
ip2
@
pytest
.
mark
.
asyncio
async
def
test_guided_decoding_type_error
(
client
:
openai
.
AsyncOpenAI
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"The best language for type-safe systems programming is "
}]
with
pytest
.
raises
(
openai
.
BadRequestError
):
_
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
extra_body
=
dict
(
guided_regex
=
{
1
:
"Python"
,
2
:
"C++"
}))
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
,
"lm-format-enforcer"
])
async
def
test_guided_choice_chat_logprobs
(
client
:
openai
.
AsyncOpenAI
,
guided_decoding_backend
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"The best language for type-safe systems programming is "
}]
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
10
,
logprobs
=
True
,
top_logprobs
=
5
,
extra_body
=
dict
(
guided_choice
=
TEST_CHOICE
,
guided_decoding_backend
=
guided_decoding_backend
))
assert
chat_completion
.
choices
[
0
].
logprobs
is
not
None
assert
chat_completion
.
choices
[
0
].
logprobs
.
content
is
not
None
top_logprobs
=
chat_completion
.
choices
[
0
].
logprobs
.
content
[
0
].
top_logprobs
# -9999.0 is the minimum logprob returned by OpenAI
for
item
in
top_logprobs
:
assert
item
.
logprob
>=
-
9999.0
,
f
"Failed (top_logprobs=
{
top_logprobs
}
)"
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
,
"lm-format-enforcer"
])
async
def
test_named_tool_use
(
client
:
openai
.
AsyncOpenAI
,
guided_decoding_backend
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
f
"Give an example JSON for an employee profile that "
f
"fits this schema:
{
TEST_SCHEMA
}
"
}]
# non-streaming
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
1000
,
tools
=
[{
"type"
:
"function"
,
"function"
:
{
"name"
:
"dummy_function_name"
,
"description"
:
"This is a dummy function"
,
"parameters"
:
TEST_SCHEMA
}
}],
tool_choice
=
{
"type"
:
"function"
,
"function"
:
{
"name"
:
"dummy_function_name"
}
})
message
=
chat_completion
.
choices
[
0
].
message
assert
len
(
message
.
content
)
==
0
json_string
=
message
.
tool_calls
[
0
].
function
.
arguments
json1
=
json
.
loads
(
json_string
)
jsonschema
.
validate
(
instance
=
json1
,
schema
=
TEST_SCHEMA
)
messages
.
append
({
"role"
:
"assistant"
,
"content"
:
json_string
})
messages
.
append
({
"role"
:
"user"
,
"content"
:
"Give me another one with a different name and age"
})
# streaming
stream
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
1000
,
tools
=
[{
"type"
:
"function"
,
"function"
:
{
"name"
:
"dummy_function_name"
,
"description"
:
"This is a dummy function"
,
"parameters"
:
TEST_SCHEMA
}
}],
tool_choice
=
{
"type"
:
"function"
,
"function"
:
{
"name"
:
"dummy_function_name"
}
},
stream
=
True
)
output
=
[]
finish_reason_count
=
0
async
for
chunk
in
stream
:
delta
=
chunk
.
choices
[
0
].
delta
if
delta
.
role
:
assert
delta
.
role
==
"assistant"
assert
delta
.
content
is
None
or
len
(
delta
.
content
)
==
0
if
delta
.
tool_calls
:
output
.
append
(
delta
.
tool_calls
[
0
].
function
.
arguments
)
if
chunk
.
choices
[
0
].
finish_reason
is
not
None
:
finish_reason_count
+=
1
# finish reason should only return in last block
assert
finish_reason_count
==
1
json2
=
json
.
loads
(
""
.
join
(
output
))
jsonschema
.
validate
(
instance
=
json2
,
schema
=
TEST_SCHEMA
)
assert
json1
[
"name"
]
!=
json2
[
"name"
]
assert
json1
[
"age"
]
!=
json2
[
"age"
]
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
])
async
def
test_required_tool_use_not_yet_supported
(
client
:
openai
.
AsyncOpenAI
,
guided_decoding_backend
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
f
"Give an example JSON for an employee profile that "
f
"fits this schema:
{
TEST_SCHEMA
}
"
}]
with
pytest
.
raises
(
openai
.
BadRequestError
):
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
1000
,
tools
=
[{
"type"
:
"function"
,
"function"
:
{
"name"
:
"dummy_function_name"
,
"description"
:
"This is a dummy function"
,
"parameters"
:
TEST_SCHEMA
}
}],
tool_choice
=
"required"
)
with
pytest
.
raises
(
openai
.
BadRequestError
):
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
1000
,
tools
=
[{
"type"
:
"function"
,
"function"
:
{
"name"
:
"dummy_function_name"
,
"description"
:
"This is a dummy function"
,
"parameters"
:
TEST_SCHEMA
}
}],
tool_choice
=
"auto"
)
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
])
async
def
test_inconsistent_tool_choice_and_tools
(
client
:
openai
.
AsyncOpenAI
,
guided_decoding_backend
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
f
"Give an example JSON for an employee profile that "
f
"fits this schema:
{
TEST_SCHEMA
}
"
}]
with
pytest
.
raises
(
openai
.
BadRequestError
):
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
1000
,
tool_choice
=
{
"type"
:
"function"
,
"function"
:
{
"name"
:
"dummy_function_name"
}
})
with
pytest
.
raises
(
openai
.
BadRequestError
):
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
1000
,
tools
=
[{
"type"
:
"function"
,
"function"
:
{
"name"
:
"dummy_function_name"
,
"description"
:
"This is a dummy function"
,
"parameters"
:
TEST_SCHEMA
}
}],
tool_choice
=
{
"type"
:
"function"
,
"function"
:
{
"name"
:
"nondefined_function_name"
}
})
@
pytest
.
mark
.
asyncio
async
def
test_response_format_json_object
(
client
:
openai
.
AsyncOpenAI
):
for
_
in
range
(
2
):
resp
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
[{
"role"
:
"user"
,
"content"
:
(
'what is 1+1? please respond with a JSON object, '
'the format is {"result": 2}'
)
}],
response_format
=
{
"type"
:
"json_object"
})
content
=
resp
.
choices
[
0
].
message
.
content
assert
content
is
not
None
loaded
=
json
.
loads
(
content
)
assert
loaded
==
{
"result"
:
2
},
loaded
@
pytest
.
mark
.
asyncio
async
def
test_extra_fields
(
client
:
openai
.
AsyncOpenAI
):
with
pytest
.
raises
(
BadRequestError
)
as
exc_info
:
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
[{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
,
"extra_field"
:
"0"
,
}],
# type: ignore
temperature
=
0
,
seed
=
0
)
assert
"extra_forbidden"
in
exc_info
.
value
.
message
@
pytest
.
mark
.
asyncio
async
def
test_complex_message_content
(
client
:
openai
.
AsyncOpenAI
):
resp
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
[{
"role"
:
"user"
,
"content"
:
[{
"type"
:
"text"
,
"text"
:
"what is 1+1? please provide the result without any other text."
}]
}],
temperature
=
0
,
seed
=
0
)
content
=
resp
.
choices
[
0
].
message
.
content
assert
content
==
"2"
@
pytest
.
mark
.
asyncio
async
def
test_custom_role
(
client
:
openai
.
AsyncOpenAI
):
# Not sure how the model handles custom roles so we just check that
# both string and complex message content are handled in the same way
resp1
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
[{
"role"
:
"my-custom-role"
,
"content"
:
"what is 1+1?"
,
}],
# type: ignore
temperature
=
0
,
seed
=
0
)
resp2
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
[{
"role"
:
"my-custom-role"
,
"content"
:
[{
"type"
:
"text"
,
"text"
:
"what is 1+1?"
}]
}],
# type: ignore
temperature
=
0
,
seed
=
0
)
content1
=
resp1
.
choices
[
0
].
message
.
content
content2
=
resp2
.
choices
[
0
].
message
.
content
assert
content1
==
content2
@
pytest
.
mark
.
asyncio
async
def
test_long_seed
(
client
:
openai
.
AsyncOpenAI
):
for
seed
in
[
torch
.
iinfo
(
torch
.
long
).
min
-
1
,
torch
.
iinfo
(
torch
.
long
).
max
+
1
]:
with
pytest
.
raises
(
BadRequestError
)
as
exc_info
:
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
[{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
,
}],
temperature
=
0
,
seed
=
seed
)
assert
(
"greater_than_equal"
in
exc_info
.
value
.
message
or
"less_than_equal"
in
exc_info
.
value
.
message
)
tests/entrypoints/test_openai_server.py
View file @
e9d32d07
...
...
@@ -10,7 +10,6 @@ import pytest
# and debugging.
import
ray
import
requests
import
torch
# downloading lora to test lora requests
from
huggingface_hub
import
snapshot_download
from
openai
import
BadRequestError
...
...
@@ -117,6 +116,7 @@ def client(server):
return
server
.
get_async_client
()
@
pytest
.
mark
.
asyncio
async
def
test_check_models
(
client
:
openai
.
AsyncOpenAI
):
models
=
await
client
.
models
.
list
()
models
=
models
.
data
...
...
@@ -266,174 +266,6 @@ async def test_too_many_completion_logprobs(client: openai.AsyncOpenAI,
assert
len
(
completion
.
choices
[
0
].
text
)
>=
0
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
# first test base model, then test loras
"model_name"
,
[
MODEL_NAME
,
"zephyr-lora"
,
"zephyr-lora2"
],
)
async
def
test_no_logprobs_chat
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"what is 1+1?"
}]
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
5
,
temperature
=
0.0
,
logprobs
=
False
)
choice
=
chat_completion
.
choices
[
0
]
assert
choice
.
logprobs
is
None
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
# just test 1 lora hereafter
"model_name"
,
[
MODEL_NAME
,
"zephyr-lora"
],
)
async
def
test_zero_logprobs_chat
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"what is 1+1?"
}]
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
5
,
temperature
=
0.0
,
logprobs
=
True
,
top_logprobs
=
0
)
choice
=
chat_completion
.
choices
[
0
]
assert
choice
.
logprobs
is
not
None
assert
choice
.
logprobs
.
content
is
not
None
assert
len
(
choice
.
logprobs
.
content
[
0
].
top_logprobs
)
==
0
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
,
"zephyr-lora"
],
)
async
def
test_some_logprobs_chat
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"what is 1+1?"
}]
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
5
,
temperature
=
0.0
,
logprobs
=
True
,
top_logprobs
=
5
)
choice
=
chat_completion
.
choices
[
0
]
assert
choice
.
logprobs
is
not
None
assert
choice
.
logprobs
.
content
is
not
None
assert
len
(
choice
.
logprobs
.
content
[
0
].
top_logprobs
)
==
5
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
,
"zephyr-lora"
],
)
async
def
test_too_many_chat_logprobs
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"what is 1+1?"
}]
# Default max_logprobs is 20, so this should raise an error
with
pytest
.
raises
((
openai
.
BadRequestError
,
openai
.
APIError
)):
stream
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
logprobs
=
True
,
top_logprobs
=
21
,
stream
=
True
)
async
for
chunk
in
stream
:
...
with
pytest
.
raises
(
openai
.
BadRequestError
):
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
logprobs
=
True
,
top_logprobs
=
30
,
stream
=
False
)
# the server should still work afterwards
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
stream
=
False
)
message
=
chat_completion
.
choices
[
0
].
message
assert
message
.
content
is
not
None
and
len
(
message
.
content
)
>=
0
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
,
"zephyr-lora"
],
)
async
def
test_single_chat_session
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"what is 1+1?"
}]
# test single completion
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
logprobs
=
True
,
top_logprobs
=
5
)
assert
chat_completion
.
id
is
not
None
assert
len
(
chat_completion
.
choices
)
==
1
choice
=
chat_completion
.
choices
[
0
]
assert
choice
.
finish_reason
==
"length"
assert
chat_completion
.
usage
==
openai
.
types
.
CompletionUsage
(
completion_tokens
=
10
,
prompt_tokens
=
37
,
total_tokens
=
47
)
message
=
choice
.
message
assert
message
.
content
is
not
None
and
len
(
message
.
content
)
>=
10
assert
message
.
role
==
"assistant"
messages
.
append
({
"role"
:
"assistant"
,
"content"
:
message
.
content
})
# test multi-turn dialogue
messages
.
append
({
"role"
:
"user"
,
"content"
:
"express your result in json"
})
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
)
message
=
chat_completion
.
choices
[
0
].
message
assert
message
.
content
is
not
None
and
len
(
message
.
content
)
>=
0
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
...
...
@@ -468,126 +300,6 @@ async def test_completion_streaming(client: openai.AsyncOpenAI,
assert
""
.
join
(
chunks
)
==
single_output
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
# just test 1 lora hereafter
"model_name"
,
[
MODEL_NAME
,
"zephyr-lora"
],
)
async
def
test_chat_streaming
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"what is 1+1?"
}]
# test single completion
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
temperature
=
0.0
,
)
output
=
chat_completion
.
choices
[
0
].
message
.
content
stop_reason
=
chat_completion
.
choices
[
0
].
finish_reason
# test streaming
stream
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
temperature
=
0.0
,
stream
=
True
,
)
chunks
:
List
[
str
]
=
[]
finish_reason_count
=
0
async
for
chunk
in
stream
:
delta
=
chunk
.
choices
[
0
].
delta
if
delta
.
role
:
assert
delta
.
role
==
"assistant"
if
delta
.
content
:
chunks
.
append
(
delta
.
content
)
if
chunk
.
choices
[
0
].
finish_reason
is
not
None
:
finish_reason_count
+=
1
# finish reason should only return in last block
assert
finish_reason_count
==
1
assert
chunk
.
choices
[
0
].
finish_reason
==
stop_reason
assert
delta
.
content
assert
""
.
join
(
chunks
)
==
output
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
"HuggingFaceH4/zephyr-7b-beta"
,
"zephyr-lora"
],
)
async
def
test_chat_completion_stream_options
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
"What is the capital of France?"
}]
# Test stream=True, stream_options={"include_usage": False}
stream
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
temperature
=
0.0
,
stream
=
True
,
stream_options
=
{
"include_usage"
:
False
})
async
for
chunk
in
stream
:
assert
chunk
.
usage
is
None
# Test stream=True, stream_options={"include_usage": True}
stream
=
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
temperature
=
0.0
,
stream
=
True
,
stream_options
=
{
"include_usage"
:
True
})
async
for
chunk
in
stream
:
if
chunk
.
choices
[
0
].
finish_reason
is
None
:
assert
chunk
.
usage
is
None
else
:
assert
chunk
.
usage
is
None
final_chunk
=
await
stream
.
__anext__
()
assert
final_chunk
.
usage
is
not
None
assert
final_chunk
.
usage
.
prompt_tokens
>
0
assert
final_chunk
.
usage
.
completion_tokens
>
0
assert
final_chunk
.
usage
.
total_tokens
==
(
final_chunk
.
usage
.
prompt_tokens
+
final_chunk
.
usage
.
completion_tokens
)
assert
final_chunk
.
choices
==
[]
# Test stream=False, stream_options={"include_usage": None}
with
pytest
.
raises
(
BadRequestError
):
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
temperature
=
0.0
,
stream
=
False
,
stream_options
=
{
"include_usage"
:
None
})
# Test stream=False, stream_options={"include_usage": True}
with
pytest
.
raises
(
BadRequestError
):
await
client
.
chat
.
completions
.
create
(
model
=
model_name
,
messages
=
messages
,
max_tokens
=
10
,
temperature
=
0.0
,
stream
=
False
,
stream_options
=
{
"include_usage"
:
True
})
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
...
...
@@ -773,53 +485,6 @@ async def test_guided_json_completion(client: openai.AsyncOpenAI,
jsonschema
.
validate
(
instance
=
output_json
,
schema
=
TEST_SCHEMA
)
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
,
"lm-format-enforcer"
])
async
def
test_guided_json_chat
(
client
:
openai
.
AsyncOpenAI
,
guided_decoding_backend
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
f
"Give an example JSON for an employee profile that "
f
"fits this schema:
{
TEST_SCHEMA
}
"
}]
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
1000
,
extra_body
=
dict
(
guided_json
=
TEST_SCHEMA
,
guided_decoding_backend
=
guided_decoding_backend
))
message
=
chat_completion
.
choices
[
0
].
message
assert
message
.
content
is
not
None
json1
=
json
.
loads
(
message
.
content
)
jsonschema
.
validate
(
instance
=
json1
,
schema
=
TEST_SCHEMA
)
messages
.
append
({
"role"
:
"assistant"
,
"content"
:
message
.
content
})
messages
.
append
({
"role"
:
"user"
,
"content"
:
"Give me another one with a different name and age"
})
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
1000
,
extra_body
=
dict
(
guided_json
=
TEST_SCHEMA
,
guided_decoding_backend
=
guided_decoding_backend
))
message
=
chat_completion
.
choices
[
0
].
message
assert
message
.
content
is
not
None
json2
=
json
.
loads
(
message
.
content
)
jsonschema
.
validate
(
instance
=
json2
,
schema
=
TEST_SCHEMA
)
assert
json1
[
"name"
]
!=
json2
[
"name"
]
assert
json1
[
"age"
]
!=
json2
[
"age"
]
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
,
"lm-format-enforcer"
])
...
...
@@ -840,44 +505,6 @@ async def test_guided_regex_completion(client: openai.AsyncOpenAI,
assert
re
.
fullmatch
(
TEST_REGEX
,
completion
.
choices
[
i
].
text
)
is
not
None
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
,
"lm-format-enforcer"
])
async
def
test_guided_regex_chat
(
client
:
openai
.
AsyncOpenAI
,
guided_decoding_backend
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
f
"Give an example IP address with this regex:
{
TEST_REGEX
}
"
}]
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
20
,
extra_body
=
dict
(
guided_regex
=
TEST_REGEX
,
guided_decoding_backend
=
guided_decoding_backend
))
ip1
=
chat_completion
.
choices
[
0
].
message
.
content
assert
ip1
is
not
None
assert
re
.
fullmatch
(
TEST_REGEX
,
ip1
)
is
not
None
messages
.
append
({
"role"
:
"assistant"
,
"content"
:
ip1
})
messages
.
append
({
"role"
:
"user"
,
"content"
:
"Give me a different one"
})
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
20
,
extra_body
=
dict
(
guided_regex
=
TEST_REGEX
,
guided_decoding_backend
=
guided_decoding_backend
))
ip2
=
chat_completion
.
choices
[
0
].
message
.
content
assert
ip2
is
not
None
assert
re
.
fullmatch
(
TEST_REGEX
,
ip2
)
is
not
None
assert
ip1
!=
ip2
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
,
"lm-format-enforcer"
])
...
...
@@ -898,385 +525,6 @@ async def test_guided_choice_completion(client: openai.AsyncOpenAI,
assert
completion
.
choices
[
i
].
text
in
TEST_CHOICE
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
,
"lm-format-enforcer"
])
async
def
test_guided_choice_chat
(
client
:
openai
.
AsyncOpenAI
,
guided_decoding_backend
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"The best language for type-safe systems programming is "
}]
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
10
,
extra_body
=
dict
(
guided_choice
=
TEST_CHOICE
,
guided_decoding_backend
=
guided_decoding_backend
))
choice1
=
chat_completion
.
choices
[
0
].
message
.
content
assert
choice1
in
TEST_CHOICE
messages
.
append
({
"role"
:
"assistant"
,
"content"
:
choice1
})
messages
.
append
({
"role"
:
"user"
,
"content"
:
"I disagree, pick another one"
})
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
10
,
extra_body
=
dict
(
guided_choice
=
TEST_CHOICE
,
guided_decoding_backend
=
guided_decoding_backend
))
choice2
=
chat_completion
.
choices
[
0
].
message
.
content
assert
choice2
in
TEST_CHOICE
assert
choice1
!=
choice2
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
,
"lm-format-enforcer"
])
async
def
test_guided_decoding_type_error
(
client
:
openai
.
AsyncOpenAI
,
guided_decoding_backend
:
str
):
with
pytest
.
raises
(
openai
.
BadRequestError
):
_
=
await
client
.
completions
.
create
(
model
=
MODEL_NAME
,
prompt
=
"Give an example JSON that fits this schema: 42"
,
extra_body
=
dict
(
guided_json
=
42
,
guided_decoding_backend
=
guided_decoding_backend
))
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"The best language for type-safe systems programming is "
}]
with
pytest
.
raises
(
openai
.
BadRequestError
):
_
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
extra_body
=
dict
(
guided_regex
=
{
1
:
"Python"
,
2
:
"C++"
}))
with
pytest
.
raises
(
openai
.
BadRequestError
):
_
=
await
client
.
completions
.
create
(
model
=
MODEL_NAME
,
prompt
=
"Give an example string that fits this regex"
,
extra_body
=
dict
(
guided_regex
=
TEST_REGEX
,
guided_json
=
TEST_SCHEMA
))
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
,
"lm-format-enforcer"
])
async
def
test_guided_choice_chat_logprobs
(
client
:
openai
.
AsyncOpenAI
,
guided_decoding_backend
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"The best language for type-safe systems programming is "
}]
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
10
,
logprobs
=
True
,
top_logprobs
=
5
,
extra_body
=
dict
(
guided_choice
=
TEST_CHOICE
,
guided_decoding_backend
=
guided_decoding_backend
))
assert
chat_completion
.
choices
[
0
].
logprobs
is
not
None
assert
chat_completion
.
choices
[
0
].
logprobs
.
content
is
not
None
top_logprobs
=
chat_completion
.
choices
[
0
].
logprobs
.
content
[
0
].
top_logprobs
# -9999.0 is the minimum logprob returned by OpenAI
for
item
in
top_logprobs
:
assert
item
.
logprob
>=
-
9999.0
,
f
"Failed (top_logprobs=
{
top_logprobs
}
)"
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
,
"lm-format-enforcer"
])
async
def
test_named_tool_use
(
client
:
openai
.
AsyncOpenAI
,
guided_decoding_backend
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
f
"Give an example JSON for an employee profile that "
f
"fits this schema:
{
TEST_SCHEMA
}
"
}]
# non-streaming
chat_completion
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
1000
,
tools
=
[{
"type"
:
"function"
,
"function"
:
{
"name"
:
"dummy_function_name"
,
"description"
:
"This is a dummy function"
,
"parameters"
:
TEST_SCHEMA
}
}],
tool_choice
=
{
"type"
:
"function"
,
"function"
:
{
"name"
:
"dummy_function_name"
}
})
message
=
chat_completion
.
choices
[
0
].
message
assert
len
(
message
.
content
)
==
0
json_string
=
message
.
tool_calls
[
0
].
function
.
arguments
json1
=
json
.
loads
(
json_string
)
jsonschema
.
validate
(
instance
=
json1
,
schema
=
TEST_SCHEMA
)
messages
.
append
({
"role"
:
"assistant"
,
"content"
:
json_string
})
messages
.
append
({
"role"
:
"user"
,
"content"
:
"Give me another one with a different name and age"
})
# streaming
stream
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
1000
,
tools
=
[{
"type"
:
"function"
,
"function"
:
{
"name"
:
"dummy_function_name"
,
"description"
:
"This is a dummy function"
,
"parameters"
:
TEST_SCHEMA
}
}],
tool_choice
=
{
"type"
:
"function"
,
"function"
:
{
"name"
:
"dummy_function_name"
}
},
stream
=
True
)
output
=
[]
finish_reason_count
=
0
async
for
chunk
in
stream
:
delta
=
chunk
.
choices
[
0
].
delta
if
delta
.
role
:
assert
delta
.
role
==
"assistant"
assert
delta
.
content
is
None
or
len
(
delta
.
content
)
==
0
if
delta
.
tool_calls
:
output
.
append
(
delta
.
tool_calls
[
0
].
function
.
arguments
)
if
chunk
.
choices
[
0
].
finish_reason
is
not
None
:
finish_reason_count
+=
1
# finish reason should only return in last block
assert
finish_reason_count
==
1
json2
=
json
.
loads
(
""
.
join
(
output
))
jsonschema
.
validate
(
instance
=
json2
,
schema
=
TEST_SCHEMA
)
assert
json1
[
"name"
]
!=
json2
[
"name"
]
assert
json1
[
"age"
]
!=
json2
[
"age"
]
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
])
async
def
test_required_tool_use_not_yet_supported
(
client
:
openai
.
AsyncOpenAI
,
guided_decoding_backend
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
f
"Give an example JSON for an employee profile that "
f
"fits this schema:
{
TEST_SCHEMA
}
"
}]
with
pytest
.
raises
(
openai
.
BadRequestError
):
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
1000
,
tools
=
[{
"type"
:
"function"
,
"function"
:
{
"name"
:
"dummy_function_name"
,
"description"
:
"This is a dummy function"
,
"parameters"
:
TEST_SCHEMA
}
}],
tool_choice
=
"required"
)
with
pytest
.
raises
(
openai
.
BadRequestError
):
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
1000
,
tools
=
[{
"type"
:
"function"
,
"function"
:
{
"name"
:
"dummy_function_name"
,
"description"
:
"This is a dummy function"
,
"parameters"
:
TEST_SCHEMA
}
}],
tool_choice
=
"auto"
)
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
])
async
def
test_inconsistent_tool_choice_and_tools
(
client
:
openai
.
AsyncOpenAI
,
guided_decoding_backend
:
str
):
messages
=
[{
"role"
:
"system"
,
"content"
:
"you are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
f
"Give an example JSON for an employee profile that "
f
"fits this schema:
{
TEST_SCHEMA
}
"
}]
with
pytest
.
raises
(
openai
.
BadRequestError
):
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
1000
,
tool_choice
=
{
"type"
:
"function"
,
"function"
:
{
"name"
:
"dummy_function_name"
}
})
with
pytest
.
raises
(
openai
.
BadRequestError
):
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
messages
,
max_tokens
=
1000
,
tools
=
[{
"type"
:
"function"
,
"function"
:
{
"name"
:
"dummy_function_name"
,
"description"
:
"This is a dummy function"
,
"parameters"
:
TEST_SCHEMA
}
}],
tool_choice
=
{
"type"
:
"function"
,
"function"
:
{
"name"
:
"nondefined_function_name"
}
})
@
pytest
.
mark
.
asyncio
async
def
test_response_format_json_object
(
client
:
openai
.
AsyncOpenAI
):
for
_
in
range
(
2
):
resp
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
[{
"role"
:
"user"
,
"content"
:
(
'what is 1+1? please respond with a JSON object, '
'the format is {"result": 2}'
)
}],
response_format
=
{
"type"
:
"json_object"
})
content
=
resp
.
choices
[
0
].
message
.
content
assert
content
is
not
None
loaded
=
json
.
loads
(
content
)
assert
loaded
==
{
"result"
:
2
},
loaded
@
pytest
.
mark
.
asyncio
async
def
test_extra_fields
(
client
:
openai
.
AsyncOpenAI
):
with
pytest
.
raises
(
BadRequestError
)
as
exc_info
:
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
[{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
,
"extra_field"
:
"0"
,
}],
# type: ignore
temperature
=
0
,
seed
=
0
)
assert
"extra_forbidden"
in
exc_info
.
value
.
message
@
pytest
.
mark
.
asyncio
async
def
test_complex_message_content
(
client
:
openai
.
AsyncOpenAI
):
resp
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
[{
"role"
:
"user"
,
"content"
:
[{
"type"
:
"text"
,
"text"
:
"what is 1+1? please provide the result without any other text."
}]
}],
temperature
=
0
,
seed
=
0
)
content
=
resp
.
choices
[
0
].
message
.
content
assert
content
==
"2"
@
pytest
.
mark
.
asyncio
async
def
test_custom_role
(
client
:
openai
.
AsyncOpenAI
):
# Not sure how the model handles custom roles so we just check that
# both string and complex message content are handled in the same way
resp1
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
[{
"role"
:
"my-custom-role"
,
"content"
:
"what is 1+1?"
,
}],
# type: ignore
temperature
=
0
,
seed
=
0
)
resp2
=
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
[{
"role"
:
"my-custom-role"
,
"content"
:
[{
"type"
:
"text"
,
"text"
:
"what is 1+1?"
}]
}],
# type: ignore
temperature
=
0
,
seed
=
0
)
content1
=
resp1
.
choices
[
0
].
message
.
content
content2
=
resp2
.
choices
[
0
].
message
.
content
assert
content1
==
content2
@
pytest
.
mark
.
asyncio
async
def
test_guided_grammar
(
client
:
openai
.
AsyncOpenAI
):
simple_sql_grammar
=
"""
...
...
@@ -1348,23 +596,22 @@ async def test_echo_logprob_completion(client: openai.AsyncOpenAI,
@
pytest
.
mark
.
asyncio
async
def
test_long_seed
(
client
:
openai
.
AsyncOpenAI
):
for
seed
in
[
torch
.
iinfo
(
torch
.
long
).
min
-
1
,
torch
.
iinfo
(
torch
.
long
).
max
+
1
]:
with
pytest
.
raises
(
BadRequestError
)
as
exc_info
:
await
client
.
chat
.
completions
.
create
(
model
=
MODEL_NAME
,
messages
=
[{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
,
}],
temperature
=
0
,
seed
=
seed
)
assert
(
"greater_than_equal"
in
exc_info
.
value
.
message
or
"less_than_equal"
in
exc_info
.
value
.
message
)
@
pytest
.
mark
.
parametrize
(
"guided_decoding_backend"
,
[
"outlines"
,
"lm-format-enforcer"
])
async
def
test_guided_decoding_type_error
(
client
:
openai
.
AsyncOpenAI
,
guided_decoding_backend
:
str
):
with
pytest
.
raises
(
openai
.
BadRequestError
):
_
=
await
client
.
completions
.
create
(
model
=
MODEL_NAME
,
prompt
=
"Give an example JSON that fits this schema: 42"
,
extra_body
=
dict
(
guided_json
=
42
,
guided_decoding_backend
=
guided_decoding_backend
))
with
pytest
.
raises
(
openai
.
BadRequestError
):
_
=
await
client
.
completions
.
create
(
model
=
MODEL_NAME
,
prompt
=
"Give an example string that fits this regex"
,
extra_body
=
dict
(
guided_regex
=
TEST_REGEX
,
guided_json
=
TEST_SCHEMA
))
@
pytest
.
mark
.
asyncio
...
...
@@ -1372,7 +619,7 @@ async def test_long_seed(client: openai.AsyncOpenAI):
"model_name"
,
[
MODEL_NAME
],
)
async
def
test_tokenize
(
server
,
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
async
def
test_tokenize
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
base_url
=
str
(
client
.
base_url
)[:
-
3
]
tokenizer
=
get_tokenizer
(
tokenizer_name
=
MODEL_NAME
,
tokenizer_mode
=
"fast"
)
...
...
@@ -1399,7 +646,7 @@ async def test_tokenize(server, client: openai.AsyncOpenAI, model_name: str):
"model_name"
,
[
MODEL_NAME
],
)
async
def
test_detokenize
(
server
,
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
async
def
test_detokenize
(
client
:
openai
.
AsyncOpenAI
,
model_name
:
str
):
base_url
=
str
(
client
.
base_url
)[:
-
3
]
tokenizer
=
get_tokenizer
(
tokenizer_name
=
MODEL_NAME
,
tokenizer_mode
=
"fast"
)
...
...
tests/entrypoints/test_openai_vision.py
View file @
e9d32d07
from
pathlib
import
Path
from
typing
import
Dict
from
typing
import
Dict
,
List
import
openai
import
pytest
...
...
@@ -216,7 +216,7 @@ async def test_chat_streaming_image(client: openai.AsyncOpenAI,
temperature
=
0.0
,
stream
=
True
,
)
chunks
=
[]
chunks
:
List
[
str
]
=
[]
finish_reason_count
=
0
async
for
chunk
in
stream
:
delta
=
chunk
.
choices
[
0
].
delta
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment