Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
7feba415
Unverified
Commit
7feba415
authored
Oct 17, 2024
by
Lianmin Zheng
Committed by
GitHub
Oct 17, 2024
Browse files
Fix failed ci tests on long prompts; Better error messages for embedding models (#1700)
parent
30ee3630
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
24 additions
and
13 deletions
+24
-13
python/sglang/srt/managers/io_struct.py
python/sglang/srt/managers/io_struct.py
+3
-0
python/sglang/srt/managers/tokenizer_manager.py
python/sglang/srt/managers/tokenizer_manager.py
+5
-1
python/sglang/srt/server.py
python/sglang/srt/server.py
+0
-2
test/srt/models/test_generation_models.py
test/srt/models/test_generation_models.py
+16
-10
No files found.
python/sglang/srt/managers/io_struct.py
View file @
7feba415
...
...
@@ -56,6 +56,9 @@ class GenerateReqInput:
# LoRA related
lora_path
:
Optional
[
Union
[
List
[
Optional
[
str
]],
Optional
[
str
]]]
=
None
# Whether it is a single request or a batch request
is_single
:
bool
=
True
def
post_init
(
self
):
if
(
self
.
text
is
None
and
self
.
input_ids
is
None
)
or
(
self
.
text
is
not
None
and
self
.
input_ids
is
not
None
...
...
python/sglang/srt/managers/tokenizer_manager.py
View file @
7feba415
...
...
@@ -150,9 +150,13 @@ class TokenizerManager:
while
self
.
model_update_lock
.
locked
():
await
asyncio
.
sleep
(
0.001
)
if
isinstance
(
obj
,
EmbeddingReqInput
)
and
self
.
is_generation
:
raise
ValueError
(
"This model does not appear to be an embedding model by default. Please add `--is-embedding` when launching the server or try another model."
)
obj
.
post_init
()
is_single
=
obj
.
is_single
if
is_single
:
async
for
response
in
self
.
_handle_single_request
(
obj
,
request
):
yield
response
...
...
python/sglang/srt/server.py
View file @
7feba415
...
...
@@ -542,8 +542,6 @@ def _wait_and_warmup(server_args, pipe_finish_writer, pid):
kill_child_process
(
pid
,
including_parent
=
False
)
return
print
(
f
"
{
res
.
json
()
=
}
"
)
logger
.
info
(
"The server is fired up and ready to roll!"
)
if
pipe_finish_writer
is
not
None
:
pipe_finish_writer
.
send
(
"ready"
)
...
...
test/srt/models/test_generation_models.py
View file @
7feba415
...
...
@@ -40,20 +40,23 @@ class ModelCase:
prefill_tolerance
:
float
=
5e-2
decode_tolerance
:
float
=
5e-2
rouge_l_tolerance
:
float
=
1
skip_long_prompt
:
bool
=
False
# Popular models that run on the CI
CI_MODELS
=
[
ModelCase
(
"meta-llama/Llama-3.1-8B-Instruct"
),
ModelCase
(
"google/gemma-2-2b"
),
ModelCase
(
"google/gemma-2-2b"
,
skip_long_prompt
=
True
),
# There is a bug with new transformers library. This can only run with transformers==4.44
]
# All other models that do not run on the CI
ALL_OTHER_MODELS
=
[
ModelCase
(
"Qwen/Qwen2-1.5B"
),
ModelCase
(
"Qwen/Qwen2.5-14B-Instruct"
),
ModelCase
(
"HuggingFaceTB/SmolLM-135M-Instruct"
),
ModelCase
(
"allenai/OLMo-1B-0724-hf"
,
decode_tolerance
=
8e-2
),
ModelCase
(
"HuggingFaceTB/SmolLM-135M-Instruct"
,
skip_long_prompt
=
True
),
ModelCase
(
"allenai/OLMo-1B-0724-hf"
,
decode_tolerance
=
8e-2
,
skip_long_prompt
=
True
),
]
TORCH_DTYPES
=
[
torch
.
float16
]
...
...
@@ -136,8 +139,15 @@ class TestGenerationModels(unittest.TestCase):
def
test_ci_models
(
self
):
for
model_case
in
CI_MODELS
:
for
torch_dtype
in
TORCH_DTYPES
:
# Skip long prompts for models that do not have a long context
prompts
=
DEFAULT_PROMPTS
if
model_case
.
skip_long_prompt
:
prompts
=
[
p
for
p
in
DEFAULT_PROMPTS
if
len
(
p
)
<
1000
]
# Assert the logits and output strs are close
self
.
assert_close_logits_and_output_strs
(
DEFAULT_PROMPTS
,
model_case
,
torch_dtype
prompts
,
model_case
,
torch_dtype
)
def
test_others
(
self
):
...
...
@@ -152,13 +162,9 @@ class TestGenerationModels(unittest.TestCase):
):
continue
# Skip long prompts for models that do
es
not have a long context
# Skip long prompts for models that do not have a long context
prompts
=
DEFAULT_PROMPTS
if
model_case
.
model_path
in
[
"HuggingFaceTB/SmolLM-135M-Instruct"
,
"allenai/OLMo-1B-0724-hf"
,
"google/gemma-2-2b"
,
# There is a bug with new transformers library. This can only run with transformers==4.44
]:
if
model_case
.
skip_long_prompt
:
prompts
=
[
p
for
p
in
DEFAULT_PROMPTS
if
len
(
p
)
<
1000
]
# Assert the logits and output strs are close
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment