Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
41d71ca4
"docs/source/en/api/image_processor.md" did not exist on "95ea538c7969b74f1da8971dfd3bfe3e794c96cc"
Unverified
Commit
41d71ca4
authored
Aug 10, 2025
by
Mick
Committed by
GitHub
Aug 09, 2025
Browse files
fix: fix obsolete qwen-audio processor arg (#9003)
parent
20cfc5a2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
12 deletions
+18
-12
python/sglang/srt/multimodal/processors/base_processor.py
python/sglang/srt/multimodal/processors/base_processor.py
+14
-10
python/sglang/srt/multimodal/processors/qwen_audio.py
python/sglang/srt/multimodal/processors/qwen_audio.py
+4
-2
No files found.
python/sglang/srt/multimodal/processors/base_processor.py
View file @
41d71ca4
...
@@ -208,7 +208,7 @@ class BaseMultimodalProcessor(ABC):
...
@@ -208,7 +208,7 @@ class BaseMultimodalProcessor(ABC):
def
process_mm_data
(
def
process_mm_data
(
self
,
input_text
,
images
=
None
,
videos
=
None
,
audios
=
None
,
**
kwargs
self
,
input_text
,
images
=
None
,
videos
=
None
,
audios
=
None
,
**
kwargs
):
)
->
dict
:
"""
"""
process multimodal data with transformers AutoProcessor
process multimodal data with transformers AutoProcessor
"""
"""
...
@@ -217,10 +217,14 @@ class BaseMultimodalProcessor(ABC):
...
@@ -217,10 +217,14 @@ class BaseMultimodalProcessor(ABC):
if
videos
:
if
videos
:
kwargs
[
"videos"
]
=
videos
kwargs
[
"videos"
]
=
videos
if
audios
:
if
audios
:
kwargs
[
"audios"
]
=
audios
if
self
.
arch
in
{
if
self
.
__class__
.
__name__
==
"Gemma3nSGLangProcessor"
:
"Gemma3nForConditionalGeneration"
,
"Qwen2AudioForConditionalGeneration"
,
}:
# Note(Xinyuan): for gemma3n, ref: https://github.com/huggingface/transformers/blob/ccf2ca162e33f381e454cdb74bf4b41a51ab976d/src/transformers/models/gemma3n/processing_gemma3n.py#L107
# Note(Xinyuan): for gemma3n, ref: https://github.com/huggingface/transformers/blob/ccf2ca162e33f381e454cdb74bf4b41a51ab976d/src/transformers/models/gemma3n/processing_gemma3n.py#L107
kwargs
[
"audio"
]
=
audios
kwargs
[
"audio"
]
=
audios
else
:
kwargs
[
"audios"
]
=
audios
processor
=
self
.
_processor
processor
=
self
.
_processor
if
(
if
(
...
@@ -607,12 +611,6 @@ class BaseMultimodalProcessor(ABC):
...
@@ -607,12 +611,6 @@ class BaseMultimodalProcessor(ABC):
all_collected_items
:
list
[
MultimodalDataItem
]
=
[]
all_collected_items
:
list
[
MultimodalDataItem
]
=
[]
input_ids
=
None
input_ids
=
None
# Handle dict items (already processed)
for
dict_item
in
dict_items
:
all_collected_items
.
extend
(
self
.
collect_mm_items_from_processor_output
(
dict_item
)
)
# Handle raw items (need processing)
# Handle raw items (need processing)
if
raw_images
or
raw_audios
or
raw_videos
:
if
raw_images
or
raw_audios
or
raw_videos
:
collected_items
,
input_ids
,
ret
=
self
.
_process_and_collect_mm_items
(
collected_items
,
input_ids
,
ret
=
self
.
_process_and_collect_mm_items
(
...
@@ -622,10 +620,16 @@ class BaseMultimodalProcessor(ABC):
...
@@ -622,10 +620,16 @@ class BaseMultimodalProcessor(ABC):
videos
=
raw_videos
,
videos
=
raw_videos
,
**
kwargs
,
**
kwargs
,
)
)
all_collected_items
.
extend
(
collected_items
)
all_collected_items
=
collected_items
else
:
else
:
ret
=
None
ret
=
None
# Handle dict items (already processed)
for
dict_item
in
dict_items
:
all_collected_items
.
extend
(
self
.
collect_mm_items_from_processor_output
(
dict_item
)
)
# Fallback tokenization if no raw items were processed
# Fallback tokenization if no raw items were processed
if
input_ids
is
None
:
if
input_ids
is
None
:
input_ids
=
self
.
_processor
.
tokenizer
(
input_ids
=
self
.
_processor
.
tokenizer
(
...
...
python/sglang/srt/multimodal/processors/qwen_audio.py
View file @
41d71ca4
import
re
import
re
from
sglang.srt.managers.schedule_batch
import
Modality
,
MultimodalDataItem
from
sglang.srt.managers.schedule_batch
import
Modality
from
sglang.srt.models.qwen2_audio
import
Qwen2AudioForConditionalGeneration
from
sglang.srt.models.qwen2_audio
import
Qwen2AudioForConditionalGeneration
from
sglang.srt.multimodal.processors.base_processor
import
(
from
sglang.srt.multimodal.processors.base_processor
import
(
BaseMultimodalProcessor
,
BaseMultimodalProcessor
,
...
@@ -29,6 +29,8 @@ class Qwen2AudioMultimodalProcessor(BaseMultimodalProcessor):
...
@@ -29,6 +29,8 @@ class Qwen2AudioMultimodalProcessor(BaseMultimodalProcessor):
audio_token_id
=
self
.
audio_token_id
,
audio_token_id
=
self
.
audio_token_id
,
).
build
(
_processor
)
).
build
(
_processor
)
self
.
ATTR_NAME_TO_MODALITY
.
update
({
"feature_attention_mask"
:
Modality
.
AUDIO
})
async
def
process_mm_data_async
(
async
def
process_mm_data_async
(
self
,
self
,
audio_data
,
audio_data
,
...
@@ -54,7 +56,7 @@ class Qwen2AudioMultimodalProcessor(BaseMultimodalProcessor):
...
@@ -54,7 +56,7 @@ class Qwen2AudioMultimodalProcessor(BaseMultimodalProcessor):
input_lengths
=
(
input_lengths
-
1
)
//
2
+
1
input_lengths
=
(
input_lengths
-
1
)
//
2
+
1
output_lengths
=
(
input_lengths
-
2
)
//
2
+
1
output_lengths
=
(
input_lengths
-
2
)
//
2
+
1
mm_items
[
0
].
model_specific_data
[
"
audio_feature_lens
"
]
=
output_lengths
mm_items
[
0
].
audio_feature_lens
=
output_lengths
return
{
return
{
"mm_items"
:
mm_items
,
"mm_items"
:
mm_items
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment