Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
8bd26dd4
Unverified
Commit
8bd26dd4
authored
Oct 24, 2025
by
Mick
Committed by
GitHub
Oct 23, 2025
Browse files
ci: fix night-ci with push retry mechanism (#11765)
parent
ab07cd3e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
67 additions
and
39 deletions
+67
-39
scripts/ci/publish_traces.py
scripts/ci/publish_traces.py
+66
-38
test/srt/test_nightly_vlms_mmmu_eval.py
test/srt/test_nightly_vlms_mmmu_eval.py
+1
-1
No files found.
scripts/ci/publish_traces.py
View file @
8bd26dd4
...
...
@@ -7,6 +7,8 @@ import base64
import
json
import
os
import
sys
import
time
from
urllib.error
import
HTTPError
from
urllib.request
import
Request
,
urlopen
...
...
@@ -28,14 +30,17 @@ def make_github_request(url, token, method="GET", data=None):
try
:
with
urlopen
(
req
)
as
response
:
return
response
.
read
().
decode
(
"utf-8"
)
except
Exception
as
e
:
except
HTTPError
as
e
:
print
(
f
"GitHub API request failed:
{
e
}
"
)
if
hasattr
(
e
,
"read"
):
try
:
error_body
=
e
.
read
().
decode
(
"utf-8"
)
print
(
f
"Error response body:
{
error_body
}
"
)
except
:
pass
try
:
error_body
=
e
.
read
().
decode
(
"utf-8"
)
print
(
f
"Error response body:
{
error_body
}
"
)
e
.
error_body
=
error_body
# Attach for later inspection
except
Exception
:
e
.
error_body
=
""
raise
except
Exception
as
e
:
print
(
f
"GitHub API request failed with a non-HTTP error:
{
e
}
"
)
raise
...
...
@@ -196,37 +201,60 @@ def publish_traces(traces_dir, run_id, run_number, is_vlm=False):
)
sys
.
exit
(
1
)
try
:
# Get current branch head
branch_sha
=
get_branch_sha
(
repo_owner
,
repo_name
,
branch
,
token
)
print
(
f
"Current branch head:
{
branch_sha
}
"
)
# Get current tree
tree_sha
=
get_tree_sha
(
repo_owner
,
repo_name
,
branch_sha
,
token
)
print
(
f
"Current tree SHA:
{
tree_sha
}
"
)
# Create new tree with all files
new_tree_sha
=
create_tree
(
repo_owner
,
repo_name
,
tree_sha
,
files_to_upload
,
token
)
print
(
f
"Created new tree:
{
new_tree_sha
}
"
)
# Create commit
commit_message
=
f
"Nightly traces for run
{
run_id
}
at
{
run_number
}
(
{
len
(
files_to_upload
)
}
files)"
commit_sha
=
create_commit
(
repo_owner
,
repo_name
,
new_tree_sha
,
branch_sha
,
commit_message
,
token
)
print
(
f
"Created commit:
{
commit_sha
}
"
)
# Update branch reference
update_branch_ref
(
repo_owner
,
repo_name
,
branch
,
commit_sha
,
token
)
print
(
"Updated branch reference"
)
print
(
"Successfully published all traces in a single commit"
)
except
Exception
as
e
:
print
(
f
"Failed to publish traces:
{
e
}
"
)
raise
max_retries
=
5
retry_delay
=
5
# seconds
for
attempt
in
range
(
max_retries
):
try
:
# Get current branch head
branch_sha
=
get_branch_sha
(
repo_owner
,
repo_name
,
branch
,
token
)
print
(
f
"Current branch head:
{
branch_sha
}
"
)
# Get current tree
tree_sha
=
get_tree_sha
(
repo_owner
,
repo_name
,
branch_sha
,
token
)
print
(
f
"Current tree SHA:
{
tree_sha
}
"
)
# Create new tree with all files
new_tree_sha
=
create_tree
(
repo_owner
,
repo_name
,
tree_sha
,
files_to_upload
,
token
)
print
(
f
"Created new tree:
{
new_tree_sha
}
"
)
# Create commit
commit_message
=
f
"Nightly traces for run
{
run_id
}
at
{
run_number
}
(
{
len
(
files_to_upload
)
}
files)"
commit_sha
=
create_commit
(
repo_owner
,
repo_name
,
new_tree_sha
,
branch_sha
,
commit_message
,
token
,
)
print
(
f
"Created commit:
{
commit_sha
}
"
)
# Update branch reference
update_branch_ref
(
repo_owner
,
repo_name
,
branch
,
commit_sha
,
token
)
print
(
"Updated branch reference"
)
print
(
"Successfully published all traces in a single commit"
)
return
except
Exception
as
e
:
is_ff_error
=
False
if
(
hasattr
(
e
,
"error_body"
)
and
"Update is not a fast forward"
in
e
.
error_body
):
is_ff_error
=
True
if
is_ff_error
and
attempt
<
max_retries
-
1
:
print
(
f
"Attempt
{
attempt
+
1
}
failed: not a fast-forward update. Retrying in
{
retry_delay
}
seconds..."
)
time
.
sleep
(
retry_delay
)
else
:
print
(
f
"Failed to publish traces:
{
e
}
"
)
raise
def
main
():
...
...
test/srt/test_nightly_vlms_mmmu_eval.py
View file @
8bd26dd4
...
...
@@ -37,7 +37,7 @@ MODEL_THRESHOLDS = {
ModelLaunchSettings
(
"Qwen/Qwen2.5-VL-7B-Instruct"
):
ModelEvalMetrics
(
0.340
,
31.9
),
ModelLaunchSettings
(
"Qwen/Qwen3-VL-30B-A3B-Instruct"
,
extra_args
=
[
"--tp=2"
]
):
ModelEvalMetrics
(
0.29
,
29.1
),
):
ModelEvalMetrics
(
0.29
,
37.0
),
ModelLaunchSettings
(
"unsloth/Mistral-Small-3.1-24B-Instruct-2503"
):
ModelEvalMetrics
(
0.310
,
16.7
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment