Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
bf53bf51
Unverified
Commit
bf53bf51
authored
Aug 28, 2024
by
Lianmin Zheng
Committed by
GitHub
Aug 28, 2024
Browse files
[Fix] Fix llava on multi images (#1247)
parent
b1a540ec
Changes
22
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
35 deletions
+18
-35
python/sglang/srt/utils.py
python/sglang/srt/utils.py
+18
-33
test/srt/test_vision_openai_server.py
test/srt/test_vision_openai_server.py
+0
-2
No files found.
python/sglang/srt/utils.py
View file @
bf53bf51
...
@@ -26,7 +26,7 @@ import struct
...
@@ -26,7 +26,7 @@ import struct
import
time
import
time
from
importlib.metadata
import
PackageNotFoundError
,
version
from
importlib.metadata
import
PackageNotFoundError
,
version
from
io
import
BytesIO
from
io
import
BytesIO
from
typing
import
List
,
Optional
from
typing
import
List
,
Optional
,
Union
import
numpy
as
np
import
numpy
as
np
import
psutil
import
psutil
...
@@ -193,35 +193,16 @@ def allocate_init_ports(
...
@@ -193,35 +193,16 @@ def allocate_init_ports(
return
ret_ports
[
0
],
ret_ports
[
1
:
num_ports_needed
]
return
ret_ports
[
0
],
ret_ports
[
1
:
num_ports_needed
]
def
get_int_token_logit_bias
(
tokenizer
,
vocab_size
):
def
is_multimodal_model
(
model_architectures
):
"""Get the logit bias for integer-only tokens."""
if
(
# a bug when model's vocab size > tokenizer.vocab_size
"LlavaLlamaForCausalLM"
in
model_architectures
if
tokenizer
==
None
:
or
"LlavaQwenForCausalLM"
in
model_architectures
return
[
-
1e5
]
*
vocab_size
or
"LlavaMistralForCausalLM"
in
model_architectures
vocab_size
=
tokenizer
.
vocab_size
or
"LlavaVidForCausalLM"
in
model_architectures
logit_bias
=
np
.
zeros
(
vocab_size
,
dtype
=
np
.
float32
)
):
for
t_id
in
range
(
vocab_size
):
return
True
ss
=
tokenizer
.
decode
([
t_id
]).
strip
()
else
:
if
not
(
ss
.
isdigit
()
or
len
(
ss
)
==
0
or
t_id
==
tokenizer
.
eos_token_id
):
return
False
logit_bias
[
t_id
]
=
-
1e5
return
logit_bias
def
is_multimodal_model
(
model
):
from
sglang.srt.model_config
import
ModelConfig
if
isinstance
(
model
,
str
):
model
=
model
.
lower
()
return
"llava"
in
model
or
"yi-vl"
in
model
or
"llava-next"
in
model
if
isinstance
(
model
,
ModelConfig
):
model_path
=
model
.
path
.
lower
()
return
(
"llava"
in
model_path
or
"yi-vl"
in
model_path
or
"llava-next"
in
model_path
)
raise
ValueError
(
"unrecognized type"
)
def
is_generation_model
(
model_architectures
,
is_embedding
:
bool
=
False
):
def
is_generation_model
(
model_architectures
,
is_embedding
:
bool
=
False
):
...
@@ -317,12 +298,14 @@ def decode_video_base64(video_base64):
...
@@ -317,12 +298,14 @@ def decode_video_base64(video_base64):
)
# Return an empty array and size tuple if no frames were found
)
# Return an empty array and size tuple if no frames were found
def
load_image
(
image_file
):
def
load_image
(
image_file
:
Union
[
str
,
bytes
]
):
from
PIL
import
Image
from
PIL
import
Image
image
=
image_size
=
None
image
=
image_size
=
None
if
image_file
.
startswith
(
"http://"
)
or
image_file
.
startswith
(
"https://"
):
if
isinstance
(
image_file
,
bytes
):
image
=
Image
.
open
(
BytesIO
(
image_file
))
elif
image_file
.
startswith
(
"http://"
)
or
image_file
.
startswith
(
"https://"
):
timeout
=
int
(
os
.
getenv
(
"REQUEST_TIMEOUT"
,
"3"
))
timeout
=
int
(
os
.
getenv
(
"REQUEST_TIMEOUT"
,
"3"
))
response
=
requests
.
get
(
image_file
,
timeout
=
timeout
)
response
=
requests
.
get
(
image_file
,
timeout
=
timeout
)
image
=
Image
.
open
(
BytesIO
(
response
.
content
))
image
=
Image
.
open
(
BytesIO
(
response
.
content
))
...
@@ -334,8 +317,10 @@ def load_image(image_file):
...
@@ -334,8 +317,10 @@ def load_image(image_file):
elif
image_file
.
startswith
(
"video:"
):
elif
image_file
.
startswith
(
"video:"
):
image_file
=
image_file
.
replace
(
"video:"
,
""
)
image_file
=
image_file
.
replace
(
"video:"
,
""
)
image
,
image_size
=
decode_video_base64
(
image_file
)
image
,
image_size
=
decode_video_base64
(
image_file
)
el
se
:
el
if
isinstance
(
image_file
,
str
)
:
image
=
Image
.
open
(
BytesIO
(
base64
.
b64decode
(
image_file
)))
image
=
Image
.
open
(
BytesIO
(
base64
.
b64decode
(
image_file
)))
else
:
raise
ValueError
(
f
"Invalid image:
{
image
}
"
)
return
image
,
image_size
return
image
,
image_size
...
...
test/srt/test_vision_openai_server.py
View file @
bf53bf51
...
@@ -32,8 +32,6 @@ class TestOpenAIVisionServer(unittest.TestCase):
...
@@ -32,8 +32,6 @@ class TestOpenAIVisionServer(unittest.TestCase):
other_args
=
[
other_args
=
[
"--chat-template"
,
"--chat-template"
,
"chatml-llava"
,
"chatml-llava"
,
"--chunked-prefill-size"
,
"16384"
,
# "--log-requests",
# "--log-requests",
],
],
)
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment