Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
sunzhq2
llm-benchmarks
Commits
8b2e8ec0
Commit
8b2e8ec0
authored
Apr 16, 2026
by
sunzhq2
Browse files
init evalscope
parent
2a7c435f
Changes
63
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
959 additions
and
0 deletions
+959
-0
utils/evalscope-1/evaluator.py
utils/evalscope-1/evaluator.py
+678
-0
utils/evalscope-1/model_apis.py
utils/evalscope-1/model_apis.py
+85
-0
utils/evalscope-1/openai_compatible.py
utils/evalscope-1/openai_compatible.py
+196
-0
No files found.
utils/evalscope-1/evaluator.py
0 → 100644
View file @
8b2e8ec0
This diff is collapsed.
Click to expand it.
utils/evalscope-1/model_apis.py
0 → 100644
View file @
8b2e8ec0
from
evalscope.api.model
import
ModelAPI
from
evalscope.api.registry
import
register_model_api
from
evalscope.utils.deprecation_utils
import
deprecated
from
evalscope.utils.import_utils
import
check_import
@
register_model_api
(
name
=
'mock_llm'
)
def
mockllm
()
->
type
[
ModelAPI
]:
from
.mockllm
import
MockLLM
return
MockLLM
@
register_model_api
(
name
=
'openai_api'
)
def
openai_api
()
->
type
[
ModelAPI
]:
from
.openai_compatible
import
OpenAICompatibleAPI
return
OpenAICompatibleAPI
@
register_model_api
(
name
=
'openai_raw_http'
)
def
openai_raw_http
()
->
type
[
ModelAPI
]:
from
.openai_compatible
import
OpenAICompatibleRawHTTP
return
OpenAICompatibleRawHTTP
@
register_model_api
(
name
=
'anthropic_api'
)
def
anthropic_api
()
->
type
[
ModelAPI
]:
check_import
(
'anthropic'
,
package
=
'anthropic'
,
raise_error
=
True
,
feature_name
=
'anthropic_api'
)
from
.anthropic_compatible
import
AnthropicCompatibleAPI
return
AnthropicCompatibleAPI
@
register_model_api
(
name
=
'server'
)
@
deprecated
(
since
=
'1.0.0'
,
remove_in
=
'1.1.0'
,
alternative
=
'openai_api'
)
def
server
()
->
type
[
ModelAPI
]:
from
.openai_compatible
import
OpenAICompatibleAPI
return
OpenAICompatibleAPI
@
register_model_api
(
name
=
'llm_ckpt'
)
def
llm_ckpt
()
->
type
[
ModelAPI
]:
check_import
(
'torch'
,
package
=
'torch'
,
raise_error
=
True
,
feature_name
=
'llm_ckpt'
)
from
.modelscope
import
ModelScopeAPI
return
ModelScopeAPI
@
register_model_api
(
name
=
'checkpoint'
)
@
deprecated
(
since
=
'1.0.0'
,
remove_in
=
'1.1.0'
,
alternative
=
'llm_ckpt'
)
def
checkpoint
()
->
type
[
ModelAPI
]:
check_import
(
'torch'
,
package
=
'torch'
,
raise_error
=
True
,
feature_name
=
'llm_ckpt'
)
from
.modelscope
import
ModelScopeAPI
return
ModelScopeAPI
@
register_model_api
(
name
=
'text2image'
)
def
text2image
()
->
type
[
ModelAPI
]:
check_import
([
'torch'
,
'torchvision'
,
'diffusers'
],
package
=
'evalscope[aigc]'
,
raise_error
=
True
,
feature_name
=
'text2image'
)
from
.text2image_model
import
Text2ImageAPI
return
Text2ImageAPI
@
register_model_api
(
name
=
'image_editing'
)
def
image_editing
()
->
type
[
ModelAPI
]:
check_import
([
'torch'
,
'torchvision'
,
'diffusers'
],
package
=
'evalscope[aigc]'
,
raise_error
=
True
,
feature_name
=
'image_editing'
)
from
.image_edit_model
import
ImageEditAPI
return
ImageEditAPI
utils/evalscope-1/openai_compatible.py
0 → 100644
View file @
8b2e8ec0
import
os
from
time
import
perf_counter
from
openai
import
APIStatusError
,
BadRequestError
,
OpenAI
,
PermissionDeniedError
,
UnprocessableEntityError
from
openai._types
import
NOT_GIVEN
from
openai.types.chat
import
ChatCompletion
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Tuple
,
Union
from
evalscope.api.messages
import
ChatMessage
from
evalscope.api.model
import
ChatCompletionChoice
,
GenerateConfig
,
ModelAPI
,
ModelOutput
from
evalscope.api.tool
import
ToolChoice
,
ToolInfo
from
evalscope.utils
import
get_logger
from
evalscope.utils.argument_utils
import
get_supported_params
from
evalscope.utils.function_utils
import
retry_call
from
.utils.openai
import
(
chat_choices_from_openai
,
collect_stream_response
,
model_output_from_openai
,
openai_chat_messages
,
openai_chat_tool_choice
,
openai_chat_tools
,
openai_completion_params
,
openai_handle_bad_request
,
)
logger
=
get_logger
()
class
OpenAICompatibleAPI
(
ModelAPI
):
def
__init__
(
self
,
model_name
:
str
,
base_url
:
Optional
[
str
]
=
None
,
api_key
:
Optional
[
str
]
=
None
,
config
:
GenerateConfig
=
GenerateConfig
(),
**
model_args
:
Any
,
)
->
None
:
super
().
__init__
(
model_name
=
model_name
,
base_url
=
base_url
,
api_key
=
api_key
,
config
=
config
,
)
# use service prefix to lookup api_key
self
.
api_key
=
api_key
or
os
.
environ
.
get
(
'EVALSCOPE_API_KEY'
,
None
)
assert
self
.
api_key
,
f
'API key for
{
model_name
}
not found'
# use service prefix to lookup base_url
self
.
base_url
=
base_url
or
os
.
environ
.
get
(
'EVALSCOPE_BASE_URL'
,
None
)
assert
self
.
base_url
,
f
'Base URL for
{
model_name
}
not found'
# remove trailing slash from base_url
self
.
base_url
=
self
.
base_url
.
rstrip
(
'/'
).
removesuffix
(
'/chat/completions'
)
# create http client
self
.
client
=
OpenAI
(
api_key
=
self
.
api_key
,
base_url
=
self
.
base_url
,
**
model_args
,
)
def
generate
(
self
,
input
:
List
[
ChatMessage
],
tools
:
List
[
ToolInfo
],
tool_choice
:
ToolChoice
,
config
:
GenerateConfig
,
)
->
ModelOutput
:
# setup request and response for ModelCall
request
:
Dict
[
str
,
Any
]
=
{}
response
:
Dict
[
str
,
Any
]
=
{}
tools
,
tool_choice
,
config
=
self
.
resolve_tools
(
tools
,
tool_choice
,
config
)
# get completion params (slice off service from model name)
completion_params
=
self
.
completion_params
(
config
=
config
,
tools
=
len
(
tools
)
>
0
,
)
request
=
dict
(
messages
=
openai_chat_messages
(
input
),
tools
=
openai_chat_tools
(
tools
)
if
len
(
tools
)
>
0
else
NOT_GIVEN
,
tool_choice
=
openai_chat_tool_choice
(
tool_choice
)
if
len
(
tools
)
>
0
else
NOT_GIVEN
,
**
completion_params
,
)
self
.
validate_request_params
(
request
)
try
:
# generate completion and save response for model call
request_start
=
perf_counter
()
completion
=
retry_call
(
self
.
client
.
chat
.
completions
.
create
,
retries
=
config
.
retries
,
sleep_interval
=
config
.
retry_interval
,
**
request
)
# handle streaming response
ttft
=
None
is_stream_response
=
not
isinstance
(
completion
,
ChatCompletion
)
if
is_stream_response
:
collected_chunks
=
[]
for
chunk
in
completion
:
collected_chunks
.
append
(
chunk
)
# TTFT should reflect first generated token/content chunk, not just any chunk.
# Different OpenAI-compatible servers may return delta as object or dict.
if
ttft
is
None
and
self
.
_chunk_has_generation_payload
(
chunk
):
ttft
=
perf_counter
()
-
request_start
completion
=
collect_stream_response
(
collected_chunks
)
response
=
completion
.
model_dump
()
self
.
on_response
(
response
)
# return output and call
choices
=
self
.
chat_choices_from_completion
(
completion
,
tools
)
model_output
=
model_output_from_openai
(
completion
,
choices
)
if
ttft
is
not
None
:
model_output
.
metadata
=
model_output
.
metadata
or
{}
model_output
.
metadata
[
'ttft'
]
=
ttft
model_output
.
metadata
[
'ttft_source'
]
=
'first_content_stream_chunk'
return
model_output
except
(
BadRequestError
,
UnprocessableEntityError
,
PermissionDeniedError
)
as
ex
:
return
self
.
handle_bad_request
(
ex
)
except
ValueError
as
ex
:
logger
.
error
(
f
'Model [
{
self
.
model_name
}
] returned an invalid response:
{
ex
}
'
)
raise
def
resolve_tools
(
self
,
tools
:
List
[
ToolInfo
],
tool_choice
:
ToolChoice
,
config
:
GenerateConfig
)
->
Tuple
[
List
[
ToolInfo
],
ToolChoice
,
GenerateConfig
]:
"""Provides an opportunity for concrete classes to customize tool resolution."""
return
tools
,
tool_choice
,
config
def
completion_params
(
self
,
config
:
GenerateConfig
,
tools
:
bool
)
->
Dict
[
str
,
Any
]:
return
openai_completion_params
(
model
=
self
.
model_name
,
config
=
config
,
tools
=
tools
,
)
def
validate_request_params
(
self
,
params
:
Dict
[
str
,
Any
]):
"""Hook for subclasses to do custom request parameter validation."""
# Cache supported params to avoid repeated calls to inspect.signature.
if
not
hasattr
(
self
,
'_valid_params'
):
self
.
_valid_params
=
get_supported_params
(
self
.
client
.
chat
.
completions
.
create
)
# Move unsupported parameters to extra_body.
extra_body
=
params
.
get
(
'extra_body'
,
{})
for
key
in
list
(
params
.
keys
()):
if
key
not
in
self
.
_valid_params
:
extra_body
[
key
]
=
params
.
pop
(
key
)
if
extra_body
:
params
[
'extra_body'
]
=
extra_body
def
on_response
(
self
,
response
:
Dict
[
str
,
Any
])
->
None
:
"""Hook for subclasses to do custom response handling."""
pass
def
chat_choices_from_completion
(
self
,
completion
:
ChatCompletion
,
tools
:
List
[
ToolInfo
])
->
List
[
ChatCompletionChoice
]:
"""Hook for subclasses to do custom chat choice processing."""
return
chat_choices_from_openai
(
completion
,
tools
)
def
handle_bad_request
(
self
,
ex
:
APIStatusError
)
->
Union
[
ModelOutput
,
Exception
]:
"""Hook for subclasses to do bad request handling"""
return
openai_handle_bad_request
(
self
.
model_name
,
ex
)
@
staticmethod
def
_chunk_has_generation_payload
(
chunk
:
Any
)
->
bool
:
"""Return True when stream chunk carries actual generated payload."""
choices
=
getattr
(
chunk
,
'choices'
,
None
)
or
[]
for
choice
in
choices
:
delta
=
getattr
(
choice
,
'delta'
,
None
)
if
delta
is
None
:
continue
if
isinstance
(
delta
,
dict
):
content
=
delta
.
get
(
'content'
)
reasoning
=
delta
.
get
(
'reasoning_content'
)
or
delta
.
get
(
'reasoning'
)
tool_calls
=
delta
.
get
(
'tool_calls'
)
else
:
content
=
getattr
(
delta
,
'content'
,
None
)
reasoning
=
getattr
(
delta
,
'reasoning_content'
,
None
)
or
getattr
(
delta
,
'reasoning'
,
None
)
tool_calls
=
getattr
(
delta
,
'tool_calls'
,
None
)
if
content
not
in
(
None
,
''
,
[]):
return
True
if
reasoning
not
in
(
None
,
''
,
[]):
return
True
if
tool_calls
:
return
True
return
False
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment