Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
0c1c72a0
Unverified
Commit
0c1c72a0
authored
Aug 12, 2024
by
Lianmin Zheng
Committed by
GitHub
Aug 12, 2024
Browse files
Fix accuracy test (#1051)
parent
41598e0d
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
15 additions
and
20 deletions
+15
-20
python/sglang/test/run_eval.py
python/sglang/test/run_eval.py
+2
-1
python/sglang/test/simple_eval_humaneval.py
python/sglang/test/simple_eval_humaneval.py
+2
-8
test/srt/test_eval_accuracy_large.py
test/srt/test_eval_accuracy_large.py
+7
-7
test/srt/test_serving_throughput.py
test/srt/test_serving_throughput.py
+4
-4
No files found.
python/sglang/test/run_eval.py
View file @
0c1c72a0
...
@@ -16,6 +16,8 @@ from sglang.test.simple_eval_common import (
...
@@ -16,6 +16,8 @@ from sglang.test.simple_eval_common import (
def
run_eval
(
args
):
def
run_eval
(
args
):
set_ulimit
()
if
"OPENAI_API_KEY"
not
in
os
.
environ
:
if
"OPENAI_API_KEY"
not
in
os
.
environ
:
os
.
environ
[
"OPENAI_API_KEY"
]
=
"EMPTY"
os
.
environ
[
"OPENAI_API_KEY"
]
=
"EMPTY"
...
@@ -117,7 +119,6 @@ if __name__ == "__main__":
...
@@ -117,7 +119,6 @@ if __name__ == "__main__":
parser
.
add_argument
(
"--eval-name"
,
type
=
str
,
default
=
"mmlu"
)
parser
.
add_argument
(
"--eval-name"
,
type
=
str
,
default
=
"mmlu"
)
parser
.
add_argument
(
"--num-examples"
,
type
=
int
)
parser
.
add_argument
(
"--num-examples"
,
type
=
int
)
parser
.
add_argument
(
"--num-threads"
,
type
=
int
,
default
=
512
)
parser
.
add_argument
(
"--num-threads"
,
type
=
int
,
default
=
512
)
set_ulimit
()
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
run_eval
(
args
)
run_eval
(
args
)
python/sglang/test/simple_eval_humaneval.py
View file @
0c1c72a0
...
@@ -6,21 +6,15 @@ Mark Chen and Jerry Tworek and Heewoo Jun and Qiming Yuan and Henrique Ponde de
...
@@ -6,21 +6,15 @@ Mark Chen and Jerry Tworek and Heewoo Jun and Qiming Yuan and Henrique Ponde de
https://arxiv.org/abs/2107.03374 https://github.com/openai/human-eval/
https://arxiv.org/abs/2107.03374 https://github.com/openai/human-eval/
"""
"""
import
json
import
logging
import
multiprocessing
import
random
import
random
import
re
import
re
from
collections
import
Counter
,
defaultdict
from
concurrent.futures
import
ThreadPoolExecutor
,
as_completed
from
concurrent.futures
import
ThreadPoolExecutor
,
as_completed
from
io
import
BytesIO
from
typing
import
Dict
,
List
from
typing
import
Any
,
Dict
,
List
,
Tuple
import
blobfile
as
bf
import
tqdm
import
tqdm
try
:
try
:
from
human_eval.data
import
HUMAN_EVAL
,
read_problems
from
human_eval.data
import
read_problems
from
human_eval.evaluation
import
estimate_pass_at_k
from
human_eval.evaluation
import
estimate_pass_at_k
from
human_eval.execution
import
check_correctness
# , unsafe_execute
from
human_eval.execution
import
check_correctness
# , unsafe_execute
except
(
ImportError
,
ModuleNotFoundError
):
except
(
ImportError
,
ModuleNotFoundError
):
...
...
test/srt/test_eval_accuracy_large.py
View file @
0c1c72a0
...
@@ -32,12 +32,12 @@ class TestEvalAccuracyLarge(unittest.TestCase):
...
@@ -32,12 +32,12 @@ class TestEvalAccuracyLarge(unittest.TestCase):
base_url
=
self
.
base_url
,
base_url
=
self
.
base_url
,
model
=
self
.
model
,
model
=
self
.
model
,
eval_name
=
"mmlu"
,
eval_name
=
"mmlu"
,
num_examples
=
None
,
num_examples
=
3000
,
num_threads
=
2048
,
num_threads
=
1024
,
)
)
metrics
=
run_eval
(
args
)
metrics
=
run_eval
(
args
)
assert
metrics
[
"score"
]
>=
0.7
0
assert
metrics
[
"score"
]
>=
0.7
1
,
f
"
{
metrics
}
"
def
test_human_eval
(
self
):
def
test_human_eval
(
self
):
args
=
SimpleNamespace
(
args
=
SimpleNamespace
(
...
@@ -45,11 +45,11 @@ class TestEvalAccuracyLarge(unittest.TestCase):
...
@@ -45,11 +45,11 @@ class TestEvalAccuracyLarge(unittest.TestCase):
model
=
self
.
model
,
model
=
self
.
model
,
eval_name
=
"humaneval"
,
eval_name
=
"humaneval"
,
num_examples
=
None
,
num_examples
=
None
,
num_threads
=
2048
,
num_threads
=
1024
,
)
)
metrics
=
run_eval
(
args
)
metrics
=
run_eval
(
args
)
assert
metrics
[
"score"
]
>=
0.65
assert
metrics
[
"score"
]
>=
0.65
,
f
"
{
metrics
}
"
def
test_mgsm_en
(
self
):
def
test_mgsm_en
(
self
):
args
=
SimpleNamespace
(
args
=
SimpleNamespace
(
...
@@ -57,11 +57,11 @@ class TestEvalAccuracyLarge(unittest.TestCase):
...
@@ -57,11 +57,11 @@ class TestEvalAccuracyLarge(unittest.TestCase):
model
=
self
.
model
,
model
=
self
.
model
,
eval_name
=
"mgsm_en"
,
eval_name
=
"mgsm_en"
,
num_examples
=
None
,
num_examples
=
None
,
num_threads
=
2048
,
num_threads
=
1024
,
)
)
metrics
=
run_eval
(
args
)
metrics
=
run_eval
(
args
)
assert
metrics
[
"score"
]
>=
0.85
assert
metrics
[
"score"
]
>=
0.85
,
f
"
{
metrics
}
"
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
test/srt/test_serving_throughput.py
View file @
0c1c72a0
...
@@ -66,8 +66,8 @@ class TestServingThroughput(unittest.TestCase):
...
@@ -66,8 +66,8 @@ class TestServingThroughput(unittest.TestCase):
)
)
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
# A100 performance
# A100
(PCIE)
performance
assert
res
[
"output_throughput"
]
>=
1
3
00
assert
res
[
"output_throughput"
]
>=
1
4
00
def
test_default_without_radix_cache
(
self
):
def
test_default_without_radix_cache
(
self
):
res
=
self
.
run_test
(
res
=
self
.
run_test
(
...
@@ -77,8 +77,8 @@ class TestServingThroughput(unittest.TestCase):
...
@@ -77,8 +77,8 @@ class TestServingThroughput(unittest.TestCase):
)
)
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
# A100 performance
# A100
(PCIE)
performance
assert
res
[
"output_throughput"
]
>=
14
0
0
assert
res
[
"output_throughput"
]
>=
14
5
0
def
test_default_without_flashinfer
(
self
):
def
test_default_without_flashinfer
(
self
):
self
.
run_test
(
self
.
run_test
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment