Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
4f8c3aea
"git@developer.sourcefind.cn:OpenDAS/ollama.git" did not exist on "e0ed984cde1f6191e38ac2d7f4415ffd619a631f"
Unverified
Commit
4f8c3aea
authored
Nov 22, 2024
by
Yineng Zhang
Committed by
GitHub
Nov 22, 2024
Browse files
minor: update gsm8k threshold (#2125)
parent
2369e882
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
43 additions
and
33 deletions
+43
-33
.github/workflows/nightly-eval.yml
.github/workflows/nightly-eval.yml
+4
-4
python/sglang/test/test_utils.py
python/sglang/test/test_utils.py
+16
-12
test/srt/test_nightly_gsm8k_eval.py
test/srt/test_nightly_gsm8k_eval.py
+23
-17
No files found.
.github/workflows/nightly-eval.yml
View file @
4f8c3aea
...
...
@@ -27,14 +27,14 @@ jobs:
bash scripts/ci_install_dependency.sh
pip install --upgrade "evalplus[vllm] @ git+https://github.com/evalplus/evalplus"
-
name
:
Test
human eval
-
name
:
Test
gsm8k
timeout-minutes
:
120
run
:
|
cd test/srt
python3 test_nightly_
human
_eval.py
python3 test_nightly_
gsm8k
_eval.py
-
name
:
Test
gsm8k
-
name
:
Test
human eval
timeout-minutes
:
120
run
:
|
cd test/srt
python3 test_nightly_
gsm8k
_eval.py
python3 test_nightly_
human
_eval.py
python/sglang/test/test_utils.py
View file @
4f8c3aea
...
...
@@ -439,18 +439,22 @@ def popen_launch_server(
process
=
subprocess
.
Popen
(
command
,
stdout
=
None
,
stderr
=
None
,
env
=
env
)
start_time
=
time
.
time
()
while
time
.
time
()
-
start_time
<
timeout
:
try
:
headers
=
{
"Content-Type"
:
"application/json; charset=utf-8"
,
"Authorization"
:
f
"Bearer
{
api_key
}
"
,
}
response
=
requests
.
get
(
f
"
{
base_url
}
/health_generate"
,
headers
=
headers
)
if
response
.
status_code
==
200
:
return
process
except
requests
.
RequestException
:
pass
time
.
sleep
(
10
)
with
requests
.
Session
()
as
session
:
while
time
.
time
()
-
start_time
<
timeout
:
try
:
headers
=
{
"Content-Type"
:
"application/json; charset=utf-8"
,
"Authorization"
:
f
"Bearer
{
api_key
}
"
,
}
response
=
session
.
get
(
f
"
{
base_url
}
/health_generate"
,
headers
=
headers
,
)
if
response
.
status_code
==
200
:
return
process
except
requests
.
RequestException
:
pass
time
.
sleep
(
10
)
raise
TimeoutError
(
"Server failed to start within the timeout period."
)
...
...
test/srt/test_nightly_gsm8k_eval.py
View file @
4f8c3aea
import
json
import
os
import
subprocess
import
unittest
import
warnings
from
datetime
import
datetime
from
types
import
SimpleNamespace
...
...
@@ -18,23 +20,23 @@ from sglang.test.test_utils import (
)
MODEL_SCORE_THRESHOLDS
=
{
"meta-llama/Llama-3.1-8B-Instruct"
:
0.83
16
,
"mistralai/Mistral-7B-Instruct-v0.3"
:
0.58
61
,
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
:
0.8
672
,
"google/gemma-2-27b-it"
:
0.92
27
,
"meta-llama/Llama-3.1-70B-Instruct"
:
0.96
23
,
"mistralai/Mixtral-8x7B-Instruct-v0.1"
:
0.64
15
,
"Qwen/Qwen2-57B-A14B-Instruct"
:
0.87
91
,
"neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8"
:
0.8
672
,
"neuralmagic/Mistral-7B-Instruct-v0.3-FP8"
:
0.5
54
4
,
"neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
:
0.83
56
,
"neuralmagic/gemma-2-2b-it-FP8"
:
0.60
59
,
"neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8"
:
0.95
04
,
"neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8"
:
0.61
38
,
"neuralmagic/Qwen2-72B-Instruct-FP8"
:
0.95
04
,
"neuralmagic/Qwen2-57B-A14B-Instruct-FP8"
:
0.8
197
,
"hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
:
0.8
395
,
"hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
:
0.84
35
,
"meta-llama/Llama-3.1-8B-Instruct"
:
0.83
,
"mistralai/Mistral-7B-Instruct-v0.3"
:
0.58
,
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
:
0.8
4
,
"google/gemma-2-27b-it"
:
0.92
,
"meta-llama/Llama-3.1-70B-Instruct"
:
0.96
,
"mistralai/Mixtral-8x7B-Instruct-v0.1"
:
0.64
,
"Qwen/Qwen2-57B-A14B-Instruct"
:
0.87
,
"neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8"
:
0.8
4
,
"neuralmagic/Mistral-7B-Instruct-v0.3-FP8"
:
0.54
,
"neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
:
0.83
,
"neuralmagic/gemma-2-2b-it-FP8"
:
0.60
,
"neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8"
:
0.95
,
"neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8"
:
0.61
,
"neuralmagic/Qwen2-72B-Instruct-FP8"
:
0.95
,
"neuralmagic/Qwen2-57B-A14B-Instruct-FP8"
:
0.8
2
,
"hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
:
0.8
4
,
"hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
:
0.84
,
}
...
...
@@ -65,6 +67,7 @@ def launch_server(base_url, model, is_fp8, is_tp2):
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
other_args
,
return_stdout_stderr
=
(
subprocess
.
DEVNULL
,
subprocess
.
DEVNULL
),
)
return
process
...
...
@@ -132,6 +135,9 @@ class TestEvalAccuracyLarge(unittest.TestCase):
kill_child_process
(
self
.
process
.
pid
,
include_self
=
True
)
def
test_mgsm_en_all_models
(
self
):
warnings
.
filterwarnings
(
"ignore"
,
category
=
ResourceWarning
,
message
=
"unclosed.*socket"
)
is_first
=
True
all_results
=
[]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment