Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
c126a6cc
Unverified
Commit
c126a6cc
authored
Jul 20, 2024
by
zhyncs
Committed by
GitHub
Jul 19, 2024
Browse files
feat: add benchmark serving (#657)
parent
ac971ff6
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
660 additions
and
0 deletions
+660
-0
python/sglang/bench.py
python/sglang/bench.py
+627
-0
python/sglang/srt/openai_protocol.py
python/sglang/srt/openai_protocol.py
+17
-0
python/sglang/srt/server.py
python/sglang/srt/server.py
+16
-0
No files found.
python/sglang/bench.py
0 → 100644
View file @
c126a6cc
This diff is collapsed.
Click to expand it.
python/sglang/srt/openai_protocol.py
View file @
c126a6cc
...
...
@@ -7,6 +7,23 @@ from pydantic import BaseModel, Field
from
typing_extensions
import
Literal
class
ModelCard
(
BaseModel
):
"""Model cards."""
id
:
str
object
:
str
=
"model"
created
:
int
=
Field
(
default_factory
=
lambda
:
int
(
time
.
time
()))
owned_by
:
str
=
"sglang"
root
:
Optional
[
str
]
=
None
class
ModelList
(
BaseModel
):
"""Model list consists of model cards."""
object
:
str
=
"list"
data
:
List
[
ModelCard
]
=
[]
class
ErrorResponse
(
BaseModel
):
object
:
str
=
"error"
message
:
str
...
...
python/sglang/srt/server.py
View file @
c126a6cc
...
...
@@ -44,6 +44,7 @@ from sglang.srt.openai_api_adapter import (
v1_chat_completions
,
v1_completions
,
)
from
sglang.srt.openai_protocol
import
ModelCard
,
ModelList
from
sglang.srt.server_args
import
PortArgs
,
ServerArgs
from
sglang.srt.utils
import
(
API_KEY_HEADER_NAME
,
...
...
@@ -73,6 +74,21 @@ async def health() -> Response:
return
Response
(
status_code
=
200
)
def
get_model_list
():
"""Available models."""
model_names
=
[
tokenizer_manager
.
model_path
]
return
model_names
@
app
.
get
(
"/v1/models"
)
def
available_models
():
"""Show available models."""
model_cards
=
[]
for
model_name
in
get_model_list
():
model_cards
.
append
(
ModelCard
(
id
=
model_name
,
root
=
model_name
))
return
ModelList
(
data
=
model_cards
)
@
app
.
get
(
"/get_model_info"
)
async
def
get_model_info
():
result
=
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment