Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
55b974f9
"tests/pipelines/photon/__init__.py" did not exist on "6ab2dd18a4d17d90c92409886ac22a02acf25d7d"
Unverified
Commit
55b974f9
authored
Sep 29, 2024
by
Liangsheng Yin
Committed by
GitHub
Sep 29, 2024
Browse files
Process image in parallel (#1539)
parent
f86c1e61
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
204 additions
and
147 deletions
+204
-147
python/sglang/srt/managers/image_processor.py
python/sglang/srt/managers/image_processor.py
+187
-0
python/sglang/srt/managers/tokenizer_manager.py
python/sglang/srt/managers/tokenizer_manager.py
+17
-147
No files found.
python/sglang/srt/managers/image_processor.py
0 → 100644
View file @
55b974f9
# TODO: also move pad_input_ids into this module
import
asyncio
import
concurrent.futures
import
logging
import
multiprocessing
as
mp
import
os
from
abc
import
ABC
,
abstractmethod
from
typing
import
List
,
Optional
,
Union
import
numpy
as
np
import
transformers
from
sglang.srt.hf_transformers_utils
import
get_processor
from
sglang.srt.mm_utils
import
expand2square
,
process_anyres_image
from
sglang.srt.server_args
import
ServerArgs
from
sglang.srt.utils
import
load_image
from
sglang.utils
import
get_exception_traceback
logger
=
logging
.
getLogger
(
__name__
)
global
global_processor
def
init_global_processor
(
server_args
:
ServerArgs
):
"""Init the global processor for multi modal models."""
global
global_processor
transformers
.
logging
.
set_verbosity_error
()
global_processor
=
get_processor
(
server_args
.
tokenizer_path
,
tokenizer_mode
=
server_args
.
tokenizer_mode
,
trust_remote_code
=
server_args
.
trust_remote_code
,
)
class
BaseImageProcessor
(
ABC
):
@
abstractmethod
async
def
process_images_async
(
self
,
image_data
,
**
kwargs
):
pass
class
DummyImageProcessor
(
BaseImageProcessor
):
async
def
process_images_async
(
self
,
*
args
,
**
kwargs
):
return
None
class
LlavaImageProcessor
(
BaseImageProcessor
):
def
__init__
(
self
,
hf_config
,
server_args
,
_image_processor
):
self
.
hf_config
=
hf_config
self
.
_image_processor
=
_image_processor
self
.
executor
=
concurrent
.
futures
.
ProcessPoolExecutor
(
initializer
=
init_global_processor
,
mp_context
=
mp
.
get_context
(
"fork"
),
initargs
=
(
server_args
,),
max_workers
=
os
.
environ
.
get
(
"SGLANG_CPU_COUNT"
,
os
.
cpu_count
()),
)
@
staticmethod
def
_process_single_image_task
(
image_data
:
Union
[
str
,
bytes
],
image_aspect_ratio
:
Optional
[
str
]
=
None
,
image_grid_pinpoints
:
Optional
[
str
]
=
None
,
image_processor
=
None
,
):
image_processor
=
image_processor
or
global_processor
.
image_processor
try
:
image
,
image_size
=
load_image
(
image_data
)
if
image_size
is
not
None
:
# It is a video with multiple images
image_hash
=
hash
(
image_data
)
pixel_values
=
image_processor
(
image
)[
"pixel_values"
]
for
_
in
range
(
len
(
pixel_values
)):
pixel_values
[
_
]
=
pixel_values
[
_
].
astype
(
np
.
float16
)
pixel_values
=
np
.
stack
(
pixel_values
,
axis
=
0
)
return
pixel_values
,
image_hash
,
image_size
else
:
# It is an image
image_hash
=
hash
(
image_data
)
if
image_aspect_ratio
==
"pad"
:
image
=
expand2square
(
image
,
tuple
(
int
(
x
*
255
)
for
x
in
image_processor
.
image_mean
),
)
pixel_values
=
image_processor
(
image
.
convert
(
"RGB"
))[
"pixel_values"
][
0
]
elif
image_aspect_ratio
==
"anyres"
or
(
image_aspect_ratio
is
not
None
and
"anyres_max"
in
image_aspect_ratio
):
pixel_values
=
process_anyres_image
(
image
,
image_processor
,
image_grid_pinpoints
)
else
:
pixel_values
=
image_processor
(
image
)[
"pixel_values"
][
0
]
if
isinstance
(
pixel_values
,
np
.
ndarray
):
pixel_values
=
pixel_values
.
astype
(
np
.
float16
)
return
pixel_values
,
image_hash
,
image
.
size
except
Exception
:
logger
.
error
(
"Exception in TokenizerManager:
\n
"
+
get_exception_traceback
())
async
def
_process_single_image
(
self
,
image_data
:
Union
[
bytes
,
str
],
aspect_ratio
:
str
,
grid_pinpoints
:
str
):
if
self
.
executor
is
not
None
:
loop
=
asyncio
.
get_event_loop
()
return
await
loop
.
run_in_executor
(
self
.
executor
,
LlavaImageProcessor
.
_process_single_image_task
,
image_data
,
aspect_ratio
,
grid_pinpoints
,
)
else
:
return
self
.
_process_single_image_task
(
image_data
,
aspect_ratio
,
grid_pinpoints
)
async
def
process_images_async
(
self
,
image_data
:
List
[
Union
[
str
,
bytes
]],
request_obj
):
if
not
image_data
:
return
None
aspect_ratio
=
getattr
(
self
.
hf_config
,
"image_aspect_ratio"
,
None
)
grid_pinpoints
=
(
self
.
hf_config
.
image_grid_pinpoints
if
hasattr
(
self
.
hf_config
,
"image_grid_pinpoints"
)
and
"anyres"
in
aspect_ratio
else
None
)
if
isinstance
(
image_data
,
list
)
and
len
(
image_data
)
>
0
:
# Multiple images
if
len
(
image_data
)
>
1
:
aspect_ratio
=
"pad"
# LLaVA OneVision Handling: more than one image --> interleaved image mode or video mode. We do not use anyres
pixel_values
,
image_hashes
,
image_sizes
=
[],
[],
[]
res
=
[]
for
img_data
in
image_data
:
res
.
append
(
self
.
_process_single_image
(
img_data
,
aspect_ratio
,
grid_pinpoints
)
)
res
=
await
asyncio
.
gather
(
*
res
)
for
pixel_v
,
image_h
,
image_s
in
res
:
pixel_values
.
append
(
pixel_v
)
image_hashes
.
append
(
image_h
)
image_sizes
.
append
(
image_s
)
if
isinstance
(
pixel_values
[
0
],
np
.
ndarray
):
pixel_values
=
np
.
stack
(
pixel_values
,
axis
=
0
)
else
:
# A single image
pixel_values
,
image_hash
,
image_size
=
await
self
.
_process_single_image
(
image_data
[
0
],
aspect_ratio
,
grid_pinpoints
)
image_hashes
=
[
image_hash
]
image_sizes
=
[
image_size
]
elif
isinstance
(
image_data
,
str
):
# A single image
pixel_values
,
image_hash
,
image_size
=
await
self
.
_process_single_image
(
image_data
,
aspect_ratio
,
grid_pinpoints
)
image_hashes
=
[
image_hash
]
image_sizes
=
[
image_size
]
else
:
raise
ValueError
(
f
"Invalid image data:
{
image_data
}
"
)
return
{
"pixel_values"
:
pixel_values
,
"image_hashes"
:
image_hashes
,
"image_sizes"
:
image_sizes
,
"modalities"
:
request_obj
.
modalities
,
}
def
get_image_processor
(
hf_config
,
server_args
:
ServerArgs
,
_image_processor
)
->
BaseImageProcessor
:
return
LlavaImageProcessor
(
hf_config
,
server_args
,
_image_processor
)
def
get_dummy_image_processor
():
return
DummyImageProcessor
()
python/sglang/srt/managers/tokenizer_manager.py
View file @
55b974f9
...
...
@@ -16,17 +16,13 @@ limitations under the License.
"""TokenizerManager is a process that tokenizes the text."""
import
asyncio
import
concurrent.futures
import
dataclasses
import
json
import
logging
import
multiprocessing
as
mp
import
os
from
typing
import
Dict
,
List
,
Optional
,
Tuple
,
Union
import
fastapi
import
numpy
as
np
import
transformers
import
uvloop
import
zmq
import
zmq.asyncio
...
...
@@ -38,6 +34,10 @@ from sglang.srt.hf_transformers_utils import (
get_processor
,
get_tokenizer
,
)
from
sglang.srt.managers.image_processor
import
(
get_dummy_image_processor
,
get_image_processor
,
)
from
sglang.srt.managers.io_struct
import
(
AbortReq
,
BatchEmbeddingOut
,
...
...
@@ -53,11 +53,9 @@ from sglang.srt.managers.io_struct import (
UpdateWeightReqInput
,
UpdateWeightReqOutput
,
)
from
sglang.srt.mm_utils
import
expand2square
,
process_anyres_image
from
sglang.srt.sampling.sampling_params
import
SamplingParams
from
sglang.srt.server_args
import
PortArgs
,
ServerArgs
from
sglang.srt.utils
import
is_generation_model
,
is_multimodal_model
,
load_image
from
sglang.utils
import
get_exception_traceback
from
sglang.srt.utils
import
is_generation_model
,
is_multimodal_model
asyncio
.
set_event_loop_policy
(
uvloop
.
EventLoopPolicy
())
...
...
@@ -105,6 +103,8 @@ class TokenizerManager:
self
.
context_len
=
server_args
.
context_length
or
get_context_length
(
self
.
hf_config
)
# Create image processor placeholder
self
.
image_processor
=
get_dummy_image_processor
()
# Create tokenizer
if
server_args
.
skip_tokenizer_init
:
...
...
@@ -119,13 +119,9 @@ class TokenizerManager:
self
.
tokenizer
=
self
.
processor
.
tokenizer
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"false"
# We want to parallelize the image pre-processing so we
# create an executor for it
self
.
executor
=
concurrent
.
futures
.
ProcessPoolExecutor
(
initializer
=
init_global_processor
,
mp_context
=
mp
.
get_context
(
"fork"
),
initargs
=
(
server_args
,),
max_workers
=
os
.
environ
.
get
(
"SGLANG_CPU_COUNT"
,
os
.
cpu_count
()),
# We want to parallelize the image pre-processing so we create an executor for it
self
.
image_processor
=
get_image_processor
(
self
.
hf_config
,
server_args
,
self
.
processor
.
image_processor
)
else
:
self
.
tokenizer
=
get_tokenizer
(
...
...
@@ -194,8 +190,8 @@ class TokenizerManager:
)
if
self
.
is_generation
:
image_inputs
=
await
self
.
_get_image_inputs
(
obj
,
obj
.
image_data
if
not_use_index
else
obj
.
image_data
[
index
]
image_inputs
=
await
self
.
image_processor
.
process_images_async
(
obj
.
image_data
if
not_use_index
else
obj
.
image_data
[
index
]
,
obj
)
return_logprob
=
(
obj
.
return_logprob
if
not_use_index
else
obj
.
return_logprob
[
index
]
...
...
@@ -247,7 +243,9 @@ class TokenizerManager:
sampling_params
=
SamplingParams
(
**
obj
.
sampling_params
[
0
])
sampling_params
.
max_new_tokens
=
0
image_inputs
=
await
self
.
_get_image_inputs
(
obj
,
obj
.
image_data
[
0
])
image_inputs
=
await
self
.
image_processor
.
process_images_async
(
obj
.
image_data
[
0
],
obj
)
return_logprob
=
obj
.
return_logprob
[
0
]
logprob_start_len
=
obj
.
logprob_start_len
[
0
]
top_logprobs_num
=
obj
.
top_logprobs_num
[
0
]
...
...
@@ -362,8 +360,8 @@ class TokenizerManager:
sampling_params
=
self
.
_get_sampling_params
(
obj
.
sampling_params
[
index
])
if
self
.
is_generation
:
image_inputs
=
await
self
.
_get_image_inputs
(
obj
,
obj
.
image_data
[
index
]
image_inputs
=
await
self
.
image_processor
.
process_images_async
(
obj
.
image_data
[
index
]
,
obj
)
tokenized_obj
=
TokenizedGenerateReqInput
(
...
...
@@ -686,131 +684,3 @@ class TokenizerManager:
token_top_logprobs
,
decode_to_text
)
return
top_logprobs
async
def
_get_image_inputs
(
self
,
obj
,
image_data
:
List
[
Union
[
str
,
bytes
]]):
if
not
image_data
:
return
None
# TODO: move this into a processor for each vision architecture
aspect_ratio
=
getattr
(
self
.
hf_config
,
"image_aspect_ratio"
,
None
)
grid_pinpoints
=
(
self
.
hf_config
.
image_grid_pinpoints
if
hasattr
(
self
.
hf_config
,
"image_grid_pinpoints"
)
and
"anyres"
in
aspect_ratio
else
None
)
if
isinstance
(
image_data
,
list
)
and
len
(
image_data
)
>
0
:
# Multiple images
if
len
(
image_data
)
>
1
:
aspect_ratio
=
"pad"
# LLaVA OneVision Handling: more than one image --> interleaved image mode or video mode. We do not use anyres
pixel_values
,
image_hashes
,
image_sizes
=
[],
[],
[]
for
img_data
in
image_data
:
pixel_v
,
image_h
,
image_s
=
await
self
.
_process_single_image
(
img_data
,
aspect_ratio
,
grid_pinpoints
)
pixel_values
.
append
(
pixel_v
)
image_hashes
.
append
(
image_h
)
image_sizes
.
append
(
image_s
)
if
isinstance
(
pixel_values
[
0
],
np
.
ndarray
):
pixel_values
=
np
.
stack
(
pixel_values
,
axis
=
0
)
else
:
# A single image
pixel_values
,
image_hash
,
image_size
=
await
self
.
_process_single_image
(
image_data
[
0
],
aspect_ratio
,
grid_pinpoints
)
image_hashes
=
[
image_hash
]
image_sizes
=
[
image_size
]
elif
isinstance
(
image_data
,
str
):
# A single image
pixel_values
,
image_hash
,
image_size
=
await
self
.
_process_single_image
(
image_data
,
aspect_ratio
,
grid_pinpoints
)
image_hashes
=
[
image_hash
]
image_sizes
=
[
image_size
]
else
:
raise
ValueError
(
f
"Invalid image data:
{
image_data
}
"
)
return
{
"pixel_values"
:
pixel_values
,
"image_hashes"
:
image_hashes
,
"image_sizes"
:
image_sizes
,
"modalities"
:
obj
.
modalities
,
}
async
def
_process_single_image
(
self
,
image_data
:
Union
[
bytes
,
str
],
aspect_ratio
:
str
,
grid_pinpoints
:
str
):
if
self
.
executor
is
not
None
:
loop
=
asyncio
.
get_event_loop
()
return
await
loop
.
run_in_executor
(
self
.
executor
,
_process_single_image_task
,
image_data
,
aspect_ratio
,
grid_pinpoints
,
)
else
:
return
_process_single_image_task
(
image_data
,
aspect_ratio
,
grid_pinpoints
,
self
.
processor
)
global
global_processor
def
init_global_processor
(
server_args
:
ServerArgs
):
"""Init the global processor for multi modal models."""
global
global_processor
transformers
.
logging
.
set_verbosity_error
()
global_processor
=
get_processor
(
server_args
.
tokenizer_path
,
tokenizer_mode
=
server_args
.
tokenizer_mode
,
trust_remote_code
=
server_args
.
trust_remote_code
,
)
def
_process_single_image_task
(
image_data
:
Union
[
str
,
bytes
],
image_aspect_ratio
:
Optional
[
str
]
=
None
,
image_grid_pinpoints
:
Optional
[
str
]
=
None
,
processor
=
None
,
):
try
:
processor
=
processor
or
global_processor
image
,
image_size
=
load_image
(
image_data
)
if
image_size
is
not
None
:
# It is a video with multiple images
image_hash
=
hash
(
image_data
)
pixel_values
=
processor
.
image_processor
(
image
)[
"pixel_values"
]
for
_
in
range
(
len
(
pixel_values
)):
pixel_values
[
_
]
=
pixel_values
[
_
].
astype
(
np
.
float16
)
pixel_values
=
np
.
stack
(
pixel_values
,
axis
=
0
)
return
pixel_values
,
image_hash
,
image_size
else
:
# It is an image
image_hash
=
hash
(
image_data
)
if
image_aspect_ratio
==
"pad"
:
image
=
expand2square
(
image
,
tuple
(
int
(
x
*
255
)
for
x
in
processor
.
image_processor
.
image_mean
),
)
pixel_values
=
processor
.
image_processor
(
image
.
convert
(
"RGB"
))[
"pixel_values"
][
0
]
elif
image_aspect_ratio
==
"anyres"
or
(
image_aspect_ratio
is
not
None
and
"anyres_max"
in
image_aspect_ratio
):
pixel_values
=
process_anyres_image
(
image
,
processor
.
image_processor
,
image_grid_pinpoints
)
else
:
pixel_values
=
processor
.
image_processor
(
image
)[
"pixel_values"
][
0
]
if
isinstance
(
pixel_values
,
np
.
ndarray
):
pixel_values
=
pixel_values
.
astype
(
np
.
float16
)
return
pixel_values
,
image_hash
,
image
.
size
except
Exception
:
logger
.
error
(
"Exception in TokenizerManager:
\n
"
+
get_exception_traceback
())
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment