Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
c29b98e0
Unverified
Commit
c29b98e0
authored
Nov 15, 2024
by
Lianmin Zheng
Committed by
GitHub
Nov 15, 2024
Browse files
Fix json benchmark (#2043)
parent
954f4e6b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
2 deletions
+11
-2
benchmark/json_schema/bench_sglang.py
benchmark/json_schema/bench_sglang.py
+10
-2
python/sglang/srt/server.py
python/sglang/srt/server.py
+1
-0
No files found.
benchmark/json_schema/bench_sglang.py
View file @
c29b98e0
...
@@ -7,6 +7,8 @@ import jsonschema
...
@@ -7,6 +7,8 @@ import jsonschema
from
datasets
import
load_dataset
from
datasets
import
load_dataset
import
sglang
as
sgl
import
sglang
as
sgl
from
sglang.global_config
import
global_config
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
add_common_sglang_args_and_parse
,
add_common_sglang_args_and_parse
,
select_sglang_backend
,
select_sglang_backend
,
...
@@ -103,7 +105,6 @@ def bench_schema(args):
...
@@ -103,7 +105,6 @@ def bench_schema(args):
print
(
e
)
print
(
e
)
indexs
.
append
(
i
)
indexs
.
append
(
i
)
assert
len
(
indexs
)
==
0
,
f
"Invalid json outputs:
{
indexs
}
"
return
states
,
latency
return
states
,
latency
...
@@ -111,11 +112,18 @@ def main(args):
...
@@ -111,11 +112,18 @@ def main(args):
states
,
latency
=
bench_schema
(
args
)
states
,
latency
=
bench_schema
(
args
)
# Compute accuracy
# Compute accuracy
tokenizer
=
get_tokenizer
(
global_config
.
default_backend
.
get_server_args
()[
"tokenizer_path"
]
)
output_jsons
=
[
state
[
"json_output"
]
for
state
in
states
]
num_output_tokens
=
sum
(
len
(
tokenizer
.
encode
(
x
))
for
x
in
output_jsons
)
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
print
(
f
"Output throughput:
{
num_output_tokens
/
latency
:.
3
f
}
token/s"
)
print
(
f
"#output tokens:
{
num_output_tokens
}
"
)
# Write results
# Write results
dump_state_text
(
f
"tmp_output_
{
args
.
backend
}
.txt"
,
states
)
dump_state_text
(
f
"tmp_output_
{
args
.
backend
}
.txt"
,
states
)
with
open
(
f
"
{
args
.
backend
}
.json"
,
"w"
)
as
fout
:
with
open
(
f
"
{
args
.
backend
}
.json
l
"
,
"w"
)
as
fout
:
for
state
in
states
:
for
state
in
states
:
fout
.
write
(
state
[
"json_output"
]
+
"
\n
"
)
fout
.
write
(
state
[
"json_output"
]
+
"
\n
"
)
...
...
python/sglang/srt/server.py
View file @
c29b98e0
...
@@ -139,6 +139,7 @@ async def get_model_info():
...
@@ -139,6 +139,7 @@ async def get_model_info():
"""Get the model information."""
"""Get the model information."""
result
=
{
result
=
{
"model_path"
:
tokenizer_manager
.
model_path
,
"model_path"
:
tokenizer_manager
.
model_path
,
"tokenizer_path"
:
tokenizer_manager
.
server_args
.
tokenizer_path
,
"is_generation"
:
tokenizer_manager
.
is_generation
,
"is_generation"
:
tokenizer_manager
.
is_generation
,
}
}
return
result
return
result
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment