Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
0c1c72a0
"include/ck/utility/utility.hpp" did not exist on "c82b833d8e76094a3702046d81872132d5c4b15a"
Unverified
Commit
0c1c72a0
authored
Aug 12, 2024
by
Lianmin Zheng
Committed by
GitHub
Aug 12, 2024
Browse files
Fix accuracy test (#1051)
parent
41598e0d
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
15 additions
and
20 deletions
+15
-20
python/sglang/test/run_eval.py
python/sglang/test/run_eval.py
+2
-1
python/sglang/test/simple_eval_humaneval.py
python/sglang/test/simple_eval_humaneval.py
+2
-8
test/srt/test_eval_accuracy_large.py
test/srt/test_eval_accuracy_large.py
+7
-7
test/srt/test_serving_throughput.py
test/srt/test_serving_throughput.py
+4
-4
No files found.
python/sglang/test/run_eval.py
View file @
0c1c72a0
...
...
@@ -16,6 +16,8 @@ from sglang.test.simple_eval_common import (
def
run_eval
(
args
):
set_ulimit
()
if
"OPENAI_API_KEY"
not
in
os
.
environ
:
os
.
environ
[
"OPENAI_API_KEY"
]
=
"EMPTY"
...
...
@@ -117,7 +119,6 @@ if __name__ == "__main__":
parser
.
add_argument
(
"--eval-name"
,
type
=
str
,
default
=
"mmlu"
)
parser
.
add_argument
(
"--num-examples"
,
type
=
int
)
parser
.
add_argument
(
"--num-threads"
,
type
=
int
,
default
=
512
)
set_ulimit
()
args
=
parser
.
parse_args
()
run_eval
(
args
)
python/sglang/test/simple_eval_humaneval.py
View file @
0c1c72a0
...
...
@@ -6,21 +6,15 @@ Mark Chen and Jerry Tworek and Heewoo Jun and Qiming Yuan and Henrique Ponde de
https://arxiv.org/abs/2107.03374 https://github.com/openai/human-eval/
"""
import
json
import
logging
import
multiprocessing
import
random
import
re
from
collections
import
Counter
,
defaultdict
from
concurrent.futures
import
ThreadPoolExecutor
,
as_completed
from
io
import
BytesIO
from
typing
import
Any
,
Dict
,
List
,
Tuple
from
typing
import
Dict
,
List
import
blobfile
as
bf
import
tqdm
try
:
from
human_eval.data
import
HUMAN_EVAL
,
read_problems
from
human_eval.data
import
read_problems
from
human_eval.evaluation
import
estimate_pass_at_k
from
human_eval.execution
import
check_correctness
# , unsafe_execute
except
(
ImportError
,
ModuleNotFoundError
):
...
...
test/srt/test_eval_accuracy_large.py
View file @
0c1c72a0
...
...
@@ -32,12 +32,12 @@ class TestEvalAccuracyLarge(unittest.TestCase):
base_url
=
self
.
base_url
,
model
=
self
.
model
,
eval_name
=
"mmlu"
,
num_examples
=
None
,
num_threads
=
2048
,
num_examples
=
3000
,
num_threads
=
1024
,
)
metrics
=
run_eval
(
args
)
assert
metrics
[
"score"
]
>=
0.7
0
assert
metrics
[
"score"
]
>=
0.7
1
,
f
"
{
metrics
}
"
def
test_human_eval
(
self
):
args
=
SimpleNamespace
(
...
...
@@ -45,11 +45,11 @@ class TestEvalAccuracyLarge(unittest.TestCase):
model
=
self
.
model
,
eval_name
=
"humaneval"
,
num_examples
=
None
,
num_threads
=
2048
,
num_threads
=
1024
,
)
metrics
=
run_eval
(
args
)
assert
metrics
[
"score"
]
>=
0.65
assert
metrics
[
"score"
]
>=
0.65
,
f
"
{
metrics
}
"
def
test_mgsm_en
(
self
):
args
=
SimpleNamespace
(
...
...
@@ -57,11 +57,11 @@ class TestEvalAccuracyLarge(unittest.TestCase):
model
=
self
.
model
,
eval_name
=
"mgsm_en"
,
num_examples
=
None
,
num_threads
=
2048
,
num_threads
=
1024
,
)
metrics
=
run_eval
(
args
)
assert
metrics
[
"score"
]
>=
0.85
assert
metrics
[
"score"
]
>=
0.85
,
f
"
{
metrics
}
"
if
__name__
==
"__main__"
:
...
...
test/srt/test_serving_throughput.py
View file @
0c1c72a0
...
...
@@ -66,8 +66,8 @@ class TestServingThroughput(unittest.TestCase):
)
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
# A100 performance
assert
res
[
"output_throughput"
]
>=
1
3
00
# A100
(PCIE)
performance
assert
res
[
"output_throughput"
]
>=
1
4
00
def
test_default_without_radix_cache
(
self
):
res
=
self
.
run_test
(
...
...
@@ -77,8 +77,8 @@ class TestServingThroughput(unittest.TestCase):
)
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
# A100 performance
assert
res
[
"output_throughput"
]
>=
14
0
0
# A100
(PCIE)
performance
assert
res
[
"output_throughput"
]
>=
14
5
0
def
test_default_without_flashinfer
(
self
):
self
.
run_test
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment