Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
2373faa3
"vscode:/vscode.git/clone" did not exist on "b474a00a96f61c9082250708229d2a9845986234"
Unverified
Commit
2373faa3
authored
Jun 27, 2025
by
Lifu Huang
Committed by
GitHub
Jun 27, 2025
Browse files
Fix flakiness in LoRA batch test. (#7552)
parent
9efb2993
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
44 additions
and
56 deletions
+44
-56
python/sglang/test/runners.py
python/sglang/test/runners.py
+2
-0
test/srt/models/lora/test_lora.py
test/srt/models/lora/test_lora.py
+41
-55
test/srt/run_suite.py
test/srt/run_suite.py
+1
-1
No files found.
python/sglang/test/runners.py
View file @
2373faa3
...
...
@@ -503,6 +503,7 @@ class SRTRunner:
disable_overlap_schedule
:
bool
=
False
,
disable_custom_all_reduce
:
bool
=
False
,
torchao_config
:
Optional
[
str
]
=
None
,
sleep_on_idle
=
False
,
):
self
.
model_type
=
model_type
self
.
is_generation
=
model_type
==
"generation"
...
...
@@ -540,6 +541,7 @@ class SRTRunner:
disable_overlap_schedule
=
disable_overlap_schedule
,
cuda_graph_max_bs
=
4
,
disable_custom_all_reduce
=
disable_custom_all_reduce
,
sleep_on_idle
=
sleep_on_idle
,
**
spec_kwargs
,
)
...
...
test/srt/models/lora/test_lora.py
View file @
2373faa3
...
...
@@ -18,6 +18,7 @@ import random
import
unittest
from
typing
import
List
import
torch
from
utils
import
(
ALL_OTHER_MULTI_LORA_MODELS
,
CI_MULTI_LORA_MODELS
,
...
...
@@ -46,7 +47,7 @@ TEST_MULTIPLE_BATCH_PROMPTS = [
The Transformers are large language models,
They're used to make predictions on text.
"""
,
#
"AI is a field of computer science focused on",
TODO: Add it back after fixing its bug
"AI is a field of computer science focused on"
,
"Computer science is the study of"
,
"Write a short story."
,
"What are the main components of a computer?"
,
...
...
@@ -54,8 +55,36 @@ TEST_MULTIPLE_BATCH_PROMPTS = [
class
TestLoRA
(
CustomTestCase
):
def
_create_test_samples
(
self
,
lora_adapter_paths
:
List
[
str
],
repeated_trials
:
int
=
3
):
random
.
seed
(
42
)
# Ensure reproducibility
patterns
=
[
[
None
,
lora_adapter_paths
[
0
],
lora_adapter_paths
[
1
]],
[
lora_adapter_paths
[
0
],
None
,
lora_adapter_paths
[
1
]],
[
lora_adapter_paths
[
0
],
lora_adapter_paths
[
1
],
None
],
[
None
,
lora_adapter_paths
[
1
],
None
],
[
None
,
None
,
None
],
]
batches
=
[
[
random
.
choice
(
pattern
)
for
_
in
range
(
3
)]
for
pattern
in
patterns
for
_
in
range
(
repeated_trials
)
]
return
batches
def
ensure_reproducibility
(
self
):
seed
=
42
random
.
seed
(
seed
)
torch
.
manual_seed
(
seed
)
torch
.
cuda
.
manual_seed_all
(
seed
)
torch
.
use_deterministic_algorithms
(
True
)
def
_run_lora_multiple_batch_on_model_cases
(
self
,
model_cases
:
List
[
LoRAModelCase
]):
for
model_case
in
model_cases
:
for
torch_dtype
in
TORCH_DTYPES
:
max_new_tokens
=
32
...
...
@@ -64,57 +93,6 @@ class TestLoRA(CustomTestCase):
lora_adapter_paths
=
[
a
.
name
for
a
in
model_case
.
adaptors
]
assert
len
(
lora_adapter_paths
)
>=
2
batches
=
[
(
[
random
.
choice
(
TEST_MULTIPLE_BATCH_PROMPTS
),
random
.
choice
(
TEST_MULTIPLE_BATCH_PROMPTS
),
random
.
choice
(
TEST_MULTIPLE_BATCH_PROMPTS
),
],
[
None
,
lora_adapter_paths
[
0
],
lora_adapter_paths
[
1
],
],
),
(
[
random
.
choice
(
TEST_MULTIPLE_BATCH_PROMPTS
),
random
.
choice
(
TEST_MULTIPLE_BATCH_PROMPTS
),
random
.
choice
(
TEST_MULTIPLE_BATCH_PROMPTS
),
],
[
lora_adapter_paths
[
0
],
None
,
lora_adapter_paths
[
1
],
],
),
(
[
random
.
choice
(
TEST_MULTIPLE_BATCH_PROMPTS
),
random
.
choice
(
TEST_MULTIPLE_BATCH_PROMPTS
),
random
.
choice
(
TEST_MULTIPLE_BATCH_PROMPTS
),
],
[
lora_adapter_paths
[
0
],
lora_adapter_paths
[
1
],
None
],
),
(
[
random
.
choice
(
TEST_MULTIPLE_BATCH_PROMPTS
),
random
.
choice
(
TEST_MULTIPLE_BATCH_PROMPTS
),
random
.
choice
(
TEST_MULTIPLE_BATCH_PROMPTS
),
],
[
None
,
lora_adapter_paths
[
1
],
None
],
),
(
[
random
.
choice
(
TEST_MULTIPLE_BATCH_PROMPTS
),
random
.
choice
(
TEST_MULTIPLE_BATCH_PROMPTS
),
random
.
choice
(
TEST_MULTIPLE_BATCH_PROMPTS
),
],
[
None
,
None
,
None
],
),
]
print
(
f
"
\n
========== Testing multiple batches on base '
{
base_path
}
' with backend=
{
backend
}
, dtype=
{
torch_dtype
}
---"
)
...
...
@@ -128,23 +106,31 @@ class TestLoRA(CustomTestCase):
max_loras_per_batch
=
len
(
lora_adapter_paths
)
+
1
,
lora_backend
=
backend
,
disable_radix_cache
=
True
,
sleep_on_idle
=
True
,
# Eliminate non-determinism by forcing all requests to be processed in one batch.
attention_backend
=
"torch_native"
,
)
hf_runner
=
HFRunner
(
base_path
,
torch_dtype
=
torch_dtype
,
model_type
=
"generation"
)
batches
=
self
.
_create_test_samples
(
lora_adapter_paths
)
with
srt_runner
,
hf_runner
:
for
i
,
(
prompts
,
lora_paths
)
in
enumerate
(
batches
):
for
i
,
lora_paths
in
enumerate
(
batches
,
start
=
1
):
prompts
=
[
random
.
choice
(
TEST_MULTIPLE_BATCH_PROMPTS
)
for
_
in
range
(
3
)
]
print
(
f
"
\n
--- Running Batch
{
i
+
1
}
--- prompts:
{
prompts
}
, lora_paths:
{
lora_paths
}
"
f
"
\n
--- Running Batch
{
i
}
--- prompts:
{
prompts
}
, lora_paths:
{
lora_paths
}
"
)
self
.
ensure_reproducibility
()
srt_outputs
=
srt_runner
.
batch_forward
(
prompts
,
max_new_tokens
=
max_new_tokens
,
lora_paths
=
lora_paths
,
)
self
.
ensure_reproducibility
()
hf_outputs
=
hf_runner
.
forward
(
prompts
,
max_new_tokens
=
max_new_tokens
,
...
...
@@ -167,7 +153,7 @@ class TestLoRA(CustomTestCase):
f
"for base '
{
base_path
}
', adaptor '
{
lora_paths
}
', backend '
{
backend
}
', prompt: '
{
prompts
}
...'"
)
print
(
f
"--- Batch
{
i
+
1
}
Comparison Passed --- "
)
print
(
f
"--- Batch
{
i
}
Comparison Passed --- "
)
def
test_ci_lora_models
(
self
):
self
.
_run_lora_multiple_batch_on_model_cases
(
CI_MULTI_LORA_MODELS
)
...
...
test/srt/run_suite.py
View file @
2373faa3
...
...
@@ -13,7 +13,7 @@ class TestFile:
suites
=
{
"per-commit"
:
[
TestFile
(
"models/lora/test_lora.py"
,
76
),
TestFile
(
"models/lora/test_lora.py"
,
200
),
TestFile
(
"models/lora/test_lora_backend.py"
,
99
),
TestFile
(
"models/lora/test_multi_lora_backend.py"
,
60
),
TestFile
(
"models/lora/test_lora_cuda_graph.py"
,
250
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment