Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
8690c40b
Unverified
Commit
8690c40b
authored
Mar 29, 2025
by
fzyzcjy
Committed by
GitHub
Mar 29, 2025
Browse files
Improve stack trace of retry errors (#4845)
parent
b1cfb4e9
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
36 additions
and
22 deletions
+36
-22
python/sglang/srt/utils.py
python/sglang/srt/utils.py
+30
-0
python/sglang/test/test_utils.py
python/sglang/test/test_utils.py
+6
-22
No files found.
python/sglang/srt/utils.py
View file @
8690c40b
...
...
@@ -35,6 +35,7 @@ import sys
import
tempfile
import
threading
import
time
import
traceback
import
warnings
from
contextlib
import
contextmanager
from
functools
import
lru_cache
...
...
@@ -1766,3 +1767,32 @@ def parse_connector_type(url: str) -> str:
return
""
return
m
.
group
(
1
)
def
retry
(
fn
,
max_retry
:
int
,
initial_delay
:
float
=
2.0
,
max_delay
:
float
=
60.0
,
should_retry
:
Callable
[[
Any
],
bool
]
=
lambda
e
:
True
,
):
for
try_index
in
itertools
.
count
():
try
:
return
fn
()
except
Exception
as
e
:
if
try_index
>=
max_retry
:
raise
Exception
(
f
"retry() exceed maximum number of retries."
)
if
not
should_retry
(
e
):
raise
Exception
(
f
"retry() observe errors that should not be retried."
)
delay
=
min
(
initial_delay
*
(
2
**
try_index
),
max_delay
)
*
(
0.75
+
0.25
*
random
.
random
()
)
logger
.
warning
(
f
"retry() failed once (
{
try_index
}
th try, maximum
{
max_retry
}
retries). Will delay
{
delay
:.
2
f
}
s and retry. Error:
{
e
}
"
)
traceback
.
print_exc
()
time
.
sleep
(
delay
)
python/sglang/test/test_utils.py
View file @
8690c40b
...
...
@@ -25,7 +25,7 @@ from sglang.bench_serving import run_benchmark
from
sglang.global_config
import
global_config
from
sglang.lang.backend.openai
import
OpenAI
from
sglang.lang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.srt.utils
import
get_bool_env_var
,
kill_process_tree
from
sglang.srt.utils
import
get_bool_env_var
,
kill_process_tree
,
retry
from
sglang.test.run_eval
import
run_eval
from
sglang.utils
import
get_exception_traceback
...
...
@@ -1010,26 +1010,10 @@ def run_logprob_check(self: unittest.TestCase, arg: Tuple):
class
CustomTestCase
(
unittest
.
TestCase
):
def
_callTestMethod
(
self
,
method
):
_retry_execution
(
lambda
:
super
(
CustomTestCase
,
self
).
_callTestMethod
(
method
),
max_retry
=
_get_max_retry
(),
max_retry
=
int
(
os
.
environ
.
get
(
"SGLANG_TEST_MAX_RETRY"
,
"2"
if
is_in_ci
()
else
"0"
)
)
def
_get_max_retry
():
return
int
(
os
.
environ
.
get
(
"SGLANG_TEST_MAX_RETRY"
,
"2"
if
is_in_ci
()
else
"0"
))
def
_retry_execution
(
fn
,
max_retry
:
int
):
if
max_retry
==
0
:
fn
()
return
try
:
fn
()
except
Exception
as
e
:
print
(
f
"retry_execution failed once and will retry. This may be an error or a flaky test. Error:
{
e
}
"
retry
(
lambda
:
super
(
CustomTestCase
,
self
).
_callTestMethod
(
method
),
max_retry
=
max_retry
,
)
traceback
.
print_exc
()
_retry_execution
(
fn
,
max_retry
=
max_retry
-
1
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment