Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
d4fc1a70
Unverified
Commit
d4fc1a70
authored
Nov 28, 2024
by
Lianmin Zheng
Committed by
GitHub
Nov 28, 2024
Browse files
Crash the server correctly during error (#2231)
parent
db674e3d
Changes
46
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
42 additions
and
42 deletions
+42
-42
test/srt/test_eval_accuracy_large_chunked_prefill.py
test/srt/test_eval_accuracy_large_chunked_prefill.py
+2
-2
test/srt/test_eval_accuracy_large_mixed_chunked_prefill.py
test/srt/test_eval_accuracy_large_mixed_chunked_prefill.py
+2
-2
test/srt/test_eval_accuracy_mini.py
test/srt/test_eval_accuracy_mini.py
+2
-2
test/srt/test_input_embeddings.py
test/srt/test_input_embeddings.py
+2
-2
test/srt/test_json_constrained.py
test/srt/test_json_constrained.py
+2
-2
test/srt/test_large_max_new_tokens.py
test/srt/test_large_max_new_tokens.py
+2
-2
test/srt/test_matched_stop.py
test/srt/test_matched_stop.py
+2
-2
test/srt/test_metrics.py
test/srt/test_metrics.py
+2
-2
test/srt/test_mla.py
test/srt/test_mla.py
+2
-2
test/srt/test_mla_fp8.py
test/srt/test_mla_fp8.py
+2
-2
test/srt/test_moe_eval_accuracy_large.py
test/srt/test_moe_eval_accuracy_large.py
+2
-2
test/srt/test_nightly_gsm8k_eval.py
test/srt/test_nightly_gsm8k_eval.py
+2
-2
test/srt/test_nightly_human_eval.py
test/srt/test_nightly_human_eval.py
+3
-3
test/srt/test_openai_server.py
test/srt/test_openai_server.py
+2
-2
test/srt/test_pytorch_sampling_backend.py
test/srt/test_pytorch_sampling_backend.py
+2
-2
test/srt/test_radix_attention.py
test/srt/test_radix_attention.py
+2
-2
test/srt/test_retract_decode.py
test/srt/test_retract_decode.py
+2
-2
test/srt/test_session_control.py
test/srt/test_session_control.py
+3
-3
test/srt/test_skip_tokenizer_init.py
test/srt/test_skip_tokenizer_init.py
+2
-2
test/srt/test_srt_endpoint.py
test/srt/test_srt_endpoint.py
+2
-2
No files found.
test/srt/test_eval_accuracy_large_chunked_prefill.py
View file @
d4fc1a70
import
unittest
from
types
import
SimpleNamespace
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
...
...
@@ -25,7 +25,7 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
def
test_mmlu
(
self
):
args
=
SimpleNamespace
(
...
...
test/srt/test_eval_accuracy_large_mixed_chunked_prefill.py
View file @
d4fc1a70
import
unittest
from
types
import
SimpleNamespace
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
...
...
@@ -31,7 +31,7 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
def
test_mmlu
(
self
):
args
=
SimpleNamespace
(
...
...
test/srt/test_eval_accuracy_mini.py
View file @
d4fc1a70
import
unittest
from
types
import
SimpleNamespace
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
...
...
@@ -22,7 +22,7 @@ class TestEvalAccuracyMini(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
def
test_mmlu
(
self
):
args
=
SimpleNamespace
(
...
...
test/srt/test_input_embeddings.py
View file @
d4fc1a70
...
...
@@ -4,7 +4,7 @@ import unittest
import
requests
from
transformers
import
AutoModelForCausalLM
,
AutoTokenizer
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.test_utils
import
(
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
...
...
@@ -107,7 +107,7 @@ class TestInputEmbeds(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
if
__name__
==
"__main__"
:
...
...
test/srt/test_json_constrained.py
View file @
d4fc1a70
...
...
@@ -9,7 +9,7 @@ from concurrent.futures import ThreadPoolExecutor
import
openai
import
requests
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
...
...
@@ -46,7 +46,7 @@ class TestJSONConstrainedOutlinesBackend(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
def
run_decode
(
self
,
json_schema
,
return_logprob
=
False
,
top_logprobs_num
=
0
,
n
=
1
):
response
=
requests
.
post
(
...
...
test/srt/test_large_max_new_tokens.py
View file @
d4fc1a70
...
...
@@ -10,7 +10,7 @@ from concurrent.futures import ThreadPoolExecutor
import
openai
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.test_utils
import
(
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
...
...
@@ -52,7 +52,7 @@ class TestLargeMaxNewTokens(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
cls
.
stdout
.
close
()
cls
.
stderr
.
close
()
os
.
remove
(
STDOUT_FILENAME
)
...
...
test/srt/test_matched_stop.py
View file @
d4fc1a70
...
...
@@ -3,7 +3,7 @@ import unittest
import
requests
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
...
...
@@ -32,7 +32,7 @@ class TestMatchedStop(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
def
run_completions_generation
(
self
,
...
...
test/srt/test_metrics.py
View file @
d4fc1a70
...
...
@@ -2,7 +2,7 @@ import unittest
import
requests
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.test_utils
import
(
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
...
...
@@ -75,7 +75,7 @@ class TestEnableMetrics(unittest.TestCase):
self
.
assertIn
(
"_bucket{"
,
metrics_content
)
finally
:
kill_
child_
process
(
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
process
.
pid
)
if
__name__
==
"__main__"
:
...
...
test/srt/test_mla.py
View file @
d4fc1a70
import
unittest
from
types
import
SimpleNamespace
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MLA_MODEL_NAME_FOR_TEST
,
...
...
@@ -25,7 +25,7 @@ class TestMLA(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
def
test_mmlu
(
self
):
args
=
SimpleNamespace
(
...
...
test/srt/test_mla_fp8.py
View file @
d4fc1a70
import
unittest
from
types
import
SimpleNamespace
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MLA_FP8_MODEL_NAME_FOR_TEST
,
...
...
@@ -31,7 +31,7 @@ class TestMLA(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
def
test_mgsm_en
(
self
):
args
=
SimpleNamespace
(
...
...
test/srt/test_moe_eval_accuracy_large.py
View file @
d4fc1a70
...
...
@@ -6,7 +6,7 @@ python -m unittest test_moe_eval_accuracy_large.TestMoEEvalAccuracyLarge.test_mm
import
unittest
from
types
import
SimpleNamespace
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MOE_MODEL_NAME_FOR_TEST
,
...
...
@@ -35,7 +35,7 @@ class TestMoEEvalAccuracyLarge(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
def
test_mmlu
(
self
):
args
=
SimpleNamespace
(
...
...
test/srt/test_nightly_gsm8k_eval.py
View file @
d4fc1a70
...
...
@@ -6,7 +6,7 @@ import warnings
from
datetime
import
datetime
from
types
import
SimpleNamespace
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1
,
...
...
@@ -132,7 +132,7 @@ class TestEvalAccuracyLarge(unittest.TestCase):
def
tearDown
(
self
):
if
self
.
process
:
kill_
child_
process
(
self
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
self
.
process
.
pid
)
def
test_mgsm_en_all_models
(
self
):
warnings
.
filterwarnings
(
...
...
test/srt/test_nightly_human_eval.py
View file @
d4fc1a70
...
...
@@ -6,7 +6,7 @@ import unittest
from
test_nightly_gsm8k_eval
import
launch_server
,
parse_models
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1
,
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2
,
...
...
@@ -32,9 +32,9 @@ class TestEvalAccuracyLarge(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
if
cls
.
process
:
kill_
child_
process
(
cls
.
process
.
pid
)
kill_process
_tree
(
cls
.
process
.
pid
)
if
cls
.
eval_process
:
kill_
child_
process
(
cls
.
eval_process
.
pid
)
kill_process
_tree
(
cls
.
eval_process
.
pid
)
def
run_evalplus
(
self
,
model
):
print
(
"Delete evalplus results"
)
...
...
test/srt/test_openai_server.py
View file @
d4fc1a70
...
...
@@ -11,7 +11,7 @@ import unittest
import
openai
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.test_utils
import
(
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
...
...
@@ -37,7 +37,7 @@ class TestOpenAIServer(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
def
run_completion
(
self
,
echo
,
logprobs
,
use_list_input
,
parallel_sample_num
,
token_input
...
...
test/srt/test_pytorch_sampling_backend.py
View file @
d4fc1a70
...
...
@@ -3,7 +3,7 @@ from types import SimpleNamespace
import
requests
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
...
...
@@ -27,7 +27,7 @@ class TestPyTorchSamplingBackend(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
def
test_mmlu
(
self
):
args
=
SimpleNamespace
(
...
...
test/srt/test_radix_attention.py
View file @
d4fc1a70
...
...
@@ -8,7 +8,7 @@ from sglang.test.test_utils import (
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
kill_
child_
process
,
kill_process
_tree
,
popen_launch_server
,
)
...
...
@@ -80,7 +80,7 @@ class TestRadixCacheFCFS(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
def
test_radix_attention
(
self
):
nodes
=
gen_radix_tree
()
...
...
test/srt/test_retract_decode.py
View file @
d4fc1a70
import
unittest
from
types
import
SimpleNamespace
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
...
...
@@ -22,7 +22,7 @@ class TestRetractDecode(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
def
test_mmlu
(
self
):
args
=
SimpleNamespace
(
...
...
test/srt/test_session_control.py
View file @
d4fc1a70
...
...
@@ -9,7 +9,7 @@ import unittest
import
requests
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.test_utils
import
(
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
...
...
@@ -29,7 +29,7 @@ class TestSessionControl(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
def
test_session_control
(
self
):
chunks
=
[
...
...
@@ -191,7 +191,7 @@ class TestSessionControlVision(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
def
test_session_control
(
self
):
text_chunks
=
[
...
...
test/srt/test_skip_tokenizer_init.py
View file @
d4fc1a70
...
...
@@ -7,7 +7,7 @@ import unittest
import
requests
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.test_utils
import
(
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
...
...
@@ -30,7 +30,7 @@ class TestSkipTokenizerInit(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
def
run_decode
(
self
,
return_logprob
=
False
,
top_logprobs_num
=
0
,
n
=
1
):
max_new_tokens
=
32
...
...
test/srt/test_srt_endpoint.py
View file @
d4fc1a70
...
...
@@ -9,7 +9,7 @@ import unittest
import
numpy
as
np
import
requests
from
sglang.srt.utils
import
kill_
child_
process
from
sglang.srt.utils
import
kill_process
_tree
from
sglang.test.test_utils
import
(
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
...
...
@@ -29,7 +29,7 @@ class TestSRTEndpoint(unittest.TestCase):
@
classmethod
def
tearDownClass
(
cls
):
kill_
child_
process
(
cls
.
process
.
pid
,
include_self
=
True
)
kill_process
_tree
(
cls
.
process
.
pid
)
def
run_decode
(
self
,
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment