Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
7feba415
"docs/vscode:/vscode.git/clone" did not exist on "f9805ef11c892f3615bb56219e817f5320b870e5"
Unverified
Commit
7feba415
authored
Oct 17, 2024
by
Lianmin Zheng
Committed by
GitHub
Oct 17, 2024
Browse files
Fix failed ci tests on long prompts; Better error messages for embedding models (#1700)
parent
30ee3630
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
24 additions
and
13 deletions
+24
-13
python/sglang/srt/managers/io_struct.py
python/sglang/srt/managers/io_struct.py
+3
-0
python/sglang/srt/managers/tokenizer_manager.py
python/sglang/srt/managers/tokenizer_manager.py
+5
-1
python/sglang/srt/server.py
python/sglang/srt/server.py
+0
-2
test/srt/models/test_generation_models.py
test/srt/models/test_generation_models.py
+16
-10
No files found.
python/sglang/srt/managers/io_struct.py
View file @
7feba415
...
...
@@ -56,6 +56,9 @@ class GenerateReqInput:
# LoRA related
lora_path
:
Optional
[
Union
[
List
[
Optional
[
str
]],
Optional
[
str
]]]
=
None
# Whether it is a single request or a batch request
is_single
:
bool
=
True
def
post_init
(
self
):
if
(
self
.
text
is
None
and
self
.
input_ids
is
None
)
or
(
self
.
text
is
not
None
and
self
.
input_ids
is
not
None
...
...
python/sglang/srt/managers/tokenizer_manager.py
View file @
7feba415
...
...
@@ -150,9 +150,13 @@ class TokenizerManager:
while
self
.
model_update_lock
.
locked
():
await
asyncio
.
sleep
(
0.001
)
if
isinstance
(
obj
,
EmbeddingReqInput
)
and
self
.
is_generation
:
raise
ValueError
(
"This model does not appear to be an embedding model by default. Please add `--is-embedding` when launching the server or try another model."
)
obj
.
post_init
()
is_single
=
obj
.
is_single
if
is_single
:
async
for
response
in
self
.
_handle_single_request
(
obj
,
request
):
yield
response
...
...
python/sglang/srt/server.py
View file @
7feba415
...
...
@@ -542,8 +542,6 @@ def _wait_and_warmup(server_args, pipe_finish_writer, pid):
kill_child_process
(
pid
,
including_parent
=
False
)
return
print
(
f
"
{
res
.
json
()
=
}
"
)
logger
.
info
(
"The server is fired up and ready to roll!"
)
if
pipe_finish_writer
is
not
None
:
pipe_finish_writer
.
send
(
"ready"
)
...
...
test/srt/models/test_generation_models.py
View file @
7feba415
...
...
@@ -40,20 +40,23 @@ class ModelCase:
prefill_tolerance
:
float
=
5e-2
decode_tolerance
:
float
=
5e-2
rouge_l_tolerance
:
float
=
1
skip_long_prompt
:
bool
=
False
# Popular models that run on the CI
CI_MODELS
=
[
ModelCase
(
"meta-llama/Llama-3.1-8B-Instruct"
),
ModelCase
(
"google/gemma-2-2b"
),
ModelCase
(
"google/gemma-2-2b"
,
skip_long_prompt
=
True
),
# There is a bug with new transformers library. This can only run with transformers==4.44
]
# All other models that do not run on the CI
ALL_OTHER_MODELS
=
[
ModelCase
(
"Qwen/Qwen2-1.5B"
),
ModelCase
(
"Qwen/Qwen2.5-14B-Instruct"
),
ModelCase
(
"HuggingFaceTB/SmolLM-135M-Instruct"
),
ModelCase
(
"allenai/OLMo-1B-0724-hf"
,
decode_tolerance
=
8e-2
),
ModelCase
(
"HuggingFaceTB/SmolLM-135M-Instruct"
,
skip_long_prompt
=
True
),
ModelCase
(
"allenai/OLMo-1B-0724-hf"
,
decode_tolerance
=
8e-2
,
skip_long_prompt
=
True
),
]
TORCH_DTYPES
=
[
torch
.
float16
]
...
...
@@ -136,8 +139,15 @@ class TestGenerationModels(unittest.TestCase):
def
test_ci_models
(
self
):
for
model_case
in
CI_MODELS
:
for
torch_dtype
in
TORCH_DTYPES
:
# Skip long prompts for models that do not have a long context
prompts
=
DEFAULT_PROMPTS
if
model_case
.
skip_long_prompt
:
prompts
=
[
p
for
p
in
DEFAULT_PROMPTS
if
len
(
p
)
<
1000
]
# Assert the logits and output strs are close
self
.
assert_close_logits_and_output_strs
(
DEFAULT_PROMPTS
,
model_case
,
torch_dtype
prompts
,
model_case
,
torch_dtype
)
def
test_others
(
self
):
...
...
@@ -152,13 +162,9 @@ class TestGenerationModels(unittest.TestCase):
):
continue
# Skip long prompts for models that do
es
not have a long context
# Skip long prompts for models that do not have a long context
prompts
=
DEFAULT_PROMPTS
if
model_case
.
model_path
in
[
"HuggingFaceTB/SmolLM-135M-Instruct"
,
"allenai/OLMo-1B-0724-hf"
,
"google/gemma-2-2b"
,
# There is a bug with new transformers library. This can only run with transformers==4.44
]:
if
model_case
.
skip_long_prompt
:
prompts
=
[
p
for
p
in
DEFAULT_PROMPTS
if
len
(
p
)
<
1000
]
# Assert the logits and output strs are close
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment