Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
9ecb1856
"tests/vscode:/vscode.git/clone" did not exist on "bdd2544673245f4400ea54d8fde071227189ebeb"
Unverified
Commit
9ecb1856
authored
Jun 08, 2025
by
Lianmin Zheng
Committed by
GitHub
Jun 08, 2025
Browse files
Fix triton sliding window test case (#6981)
parent
cc74499d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
15 additions
and
18 deletions
+15
-18
test/srt/test_triton_sliding_window.py
test/srt/test_triton_sliding_window.py
+15
-18
No files found.
test/srt/test_triton_sliding_window.py
View file @
9ecb1856
import
time
import
unittest
from
types
import
SimpleNamespace
...
...
@@ -10,6 +9,7 @@ from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
is_in_ci
,
popen_launch_server
,
)
...
...
@@ -45,10 +45,6 @@ class TestSlidingWindowAttentionTriton(CustomTestCase):
)
cls
.
long_context_prompt
+=
"
\n
Now, summarize the story in one sentence:"
@
classmethod
def
tearDownClass
(
cls
):
pass
def
_test_mmlu
(
self
):
args
=
SimpleNamespace
(
base_url
=
self
.
base_url
,
...
...
@@ -61,7 +57,7 @@ class TestSlidingWindowAttentionTriton(CustomTestCase):
metrics
=
run_eval
(
args
)
print
(
f
"MMLU metrics with sliding window:
{
metrics
}
"
)
self
.
assertGreaterEqual
(
metrics
[
"score"
],
0.6
1
)
self
.
assertGreaterEqual
(
metrics
[
"score"
],
0.6
0
)
def
_test_short_context_generation
(
self
):
response
=
requests
.
post
(
...
...
@@ -97,6 +93,7 @@ class TestSlidingWindowAttentionTriton(CustomTestCase):
self
.
assertGreater
(
len
(
result
[
"text"
].
strip
()),
0
)
print
(
f
"Long context generation result:
{
result
[
'text'
][:
100
]
}
..."
)
@
unittest
.
skipIf
(
is_in_ci
(),
"To reduce the CI execution time."
)
def
test_no_cuda_graph
(
self
):
self
.
no_cuda_graph_process
=
popen_launch_server
(
self
.
model
,
...
...
@@ -105,12 +102,12 @@ class TestSlidingWindowAttentionTriton(CustomTestCase):
other_args
=
self
.
common_args
+
[
"--disable-cuda-graph"
],
)
self
.
_test_short_context_generation
()
self
.
_test_
long
_context_generation
()
self
.
_test_
mmlu
()
kill_process_tree
(
self
.
no_cuda_graph_process
.
pid
)
time
.
sleep
(
5
)
try
:
self
.
_test_
short
_context_generation
()
self
.
_test_
long_context_generation
()
self
.
_test_mmlu
()
finally
:
kill_process_tree
(
self
.
no_cuda_graph_process
.
pid
)
def
test_cuda_graph
(
self
):
self
.
cuda_graph_process
=
popen_launch_server
(
...
...
@@ -120,12 +117,12 @@ class TestSlidingWindowAttentionTriton(CustomTestCase):
other_args
=
self
.
common_args
,
)
self
.
_test_short_context_generation
()
self
.
_test_
long
_context_generation
()
self
.
_test_
mmlu
()
kill_process_tree
(
self
.
cuda_graph_process
.
pid
)
time
.
sleep
(
5
)
try
:
self
.
_test_
short
_context_generation
()
self
.
_test_
long_context_generation
()
self
.
_test_mmlu
()
finally
:
kill_process_tree
(
self
.
cuda_graph_process
.
pid
)
if
__name__
==
"__main__"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment